In [1]:
## All to folder
## generate prompt
# !ls data
# import time
# time.sleep(60*30)

# Try to do:
# torch.cuda.empty_cache()

In [2]:
import os
import json

import transformers
from peft import PeftModel
from transformers import LlamaForCausalLM as LLaMAForCausalLM
from transformers import LlamaTokenizer as LLaMATokenizer
from peft import prepare_model_for_int8_training, LoraConfig, get_peft_model
from datasets import load_dataset
from EvaluateTestSet import EvaluateTestSet
from transformers.integrations import TensorBoardCallback
from transformers import GenerationConfig

def init_lora_model_and_tokenizer(default_model,
                             LORA_R,
                             LORA_ALPHA,
                             LORA_DROPOUT
                            ):


    """
        
    """
    model = LLaMAForCausalLM.from_pretrained(
    default_model,
    load_in_8bit=True,
    device_map="auto",
    )
    tokenizer = LLaMATokenizer.from_pretrained(
        default_model, add_eos_token=True
    )

    model = prepare_model_for_int8_training(model)

    config = LoraConfig(
        r=LORA_R,
        lora_alpha=LORA_ALPHA,
        target_modules=["q_proj", "v_proj"],
        lora_dropout=LORA_DROPOUT,
        bias="none",
        task_type="CAUSAL_LM",
    )

    model = get_peft_model(model, config)

    tokenizer.pad_token_id = 0  # unk. we want this to be different from the eos token

    return model, tokenizer



class MyCustomCallback(TensorBoardCallback):
    #log_bleu_steps_factor = 5
    bleu_generation_max_new_tokens = 30
    bleu_fn_test_data = "temp/t2c_answers.json"
    bleu_fn_etalon = "temp/answers.json"
    log_step = 0
    
    def on_log(self, args, state, control, logs=None, **kwargs):
        super().on_log(args, state, control, logs=logs, **kwargs)
        #print("kwargs", len(kwargs), kwargs.keys())
        if self.tb_writer is not None:
            #print(state)
            #print(state.global_step)
            #print(self.log_step)
            if (self.log_step % self.log_bleu_steps_factor ==0):
                model = kwargs['model']
                tokenizer = kwargs['tokenizer']
                
                model.eval()
                assert not model.training
                generation_config = GenerationConfig(max_new_tokens = self.bleu_generation_max_new_tokens,
                                                     # min_new_tokens = 5,
                                                     temperature = 1.0
                                                    )
                print("generation_config:", generation_config)
                evaluator = EvaluateTestSet(generation_config = generation_config,
                                            fn_test_data = self.bleu_fn_test_data,
                                            fn_etalon = self.bleu_fn_etalon,
                                            batch_size = 1
                                       )

                metric_res = evaluator.evaluate(model=model, 
                                                tokenizer=tokenizer,
                                               )
                model.train()
                assert model.training
                print(metric_res)
                for key, val in metric_res.items():
                    #add "custom/something"
                    self.tb_writer.add_scalar(key, val, state.global_step)
                self.tb_writer.flush()
            self.log_step += 1


Welcome to bitsandbytes. For bug reports, please run

python -m bitsandbytes

 and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
bin /opt/conda/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda117.so
CUDA SETUP: CUDA runtime path found: /usr/local/cuda/lib64/libcudart.so
CUDA SETUP: Highest compute capability among GPUs detected: 8.6
CUDA SETUP: Detected CUDA version 117
CUDA SETUP: Loading binary /opt/conda/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda117.so...


  warn(msg)
Either way, this might cause trouble in the future:
If you get `CUDA error: invalid device function` errors, the above might be the cause and the solution is to make sure only one ['libcudart.so', 'libcudart.so.11.0', 'libcudart.so.12.0'] in the paths that we search based on your env.
  warn(msg)


In [3]:
CONFIG_PATH = "/root/experiments_config/"
EXPERIMENTS_PATH = "/root/experiments/"
experiment_name = "t2c_concode_220428_v20"
# t2c_concode_220428_v18.json

In [4]:
current_config_path = os.path.join(CONFIG_PATH, experiment_name + "_config.json")
experiment_config = json.load(open(current_config_path, "r"))

assert experiment_config['experiment_name'] == experiment_name

In [5]:
experiment_config['resume_from_checkpoint'] = True

In [6]:
assert experiment_config['resume_from_checkpoint'] == True

In [7]:
assert experiment_config['experiment_name'] == experiment_name

In [8]:
current_experiment_path = os.path.join(EXPERIMENTS_PATH, experiment_name)

In [9]:
!mkdir {current_experiment_path}

mkdir: cannot create directory ‘/root/experiments/t2c_concode_220428_v20’: File exists


In [10]:
json.dump(experiment_config, open(current_experiment_path + \
                                  "/experiment_config.json", 
                                  "w+"
                                 )
         )

In [11]:
setattr(MyCustomCallback, "log_bleu_steps_factor", experiment_config['log_bleu_steps_factor'])

In [12]:
MyCustomCallback.log_bleu_steps_factor

50

In [13]:
model, tokenizer = init_lora_model_and_tokenizer(default_model = experiment_config["default_model"],
                                                 LORA_R = experiment_config["LORA_R"],
                                                 LORA_ALPHA = experiment_config["LORA_ALPHA"],
                                                 LORA_DROPOUT = experiment_config["LORA_DROPOUT"]
                                                )


data = load_dataset("json", 
                    data_files = {"train": experiment_config["fn_train_dataset"],
                                  "eval":  experiment_config["fn_eval_dataset"]
                                 }
                   )



Loading checkpoint shards:   0%|          | 0/33 [00:00<?, ?it/s]

The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'LLaMATokenizer'. 
The class this function is called from is 'LlamaTokenizer'.
Found cached dataset json (/root/.cache/huggingface/datasets/json/default-3ac2744fedc77f2f/0.0.0/e347ab1c932092252e717ff3f949105a4dd28b27e842dd53157d2f72e276c2e4)


  0%|          | 0/2 [00:00<?, ?it/s]

In [14]:
tokenizer_val = LLaMATokenizer.from_pretrained(
    experiment_config['default_model'], add_eos_token=True
)
tokenizer_val.pad_token_id = 0  # unk. we want this to be different from the eos token


The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'LLaMATokenizer'. 
The class this function is called from is 'LlamaTokenizer'.


In [15]:
# experiment_config["logging_steps"] = 1
# experiment_config["eval_steps"] = 1

In [16]:
experiment_config
# {'experiment_name': 't2c_concode_220428_v19',
#  'fn_train_dataset': '/root/data/t2c_train.json',
#  'fn_eval_dataset': '/root/data/t2c_answers.json',
#  'default_model': 'decapoda-research/llama-7b-hf',
#  'MICRO_BATCH_SIZE': 2,
#  'BATCH_SIZE': 10,
#  'EPOCHS': 2,
#  'LEARNING_RATE': 0.0002,
#  'CUTOFF_LEN': 256,
#  'LORA_R': 16,
#  'LORA_ALPHA': 16,
#  'LORA_DROPOUT': 0.05,
#  'warmup_steps': 200,
#  'fp16': True,
#  'logging_steps': 10,
#  'eval_steps': 100,
#  'evaluation_strategy': 'steps',
#  'save_total_limit': 1,
#  'save_strategy': 'steps',
#  'save_steps': 500,
#  'seed': 42,
#  'logging_strategy': 'steps',
#  'report_to': 'tensorboard',
#  'mlm': False,
#  'truncation': True,
#  'padding': 'max_length',
#  'config_use_cache': False,
#  'resume_from_checkpoint': False,
#  'bleu_batch_size': 5,
#  'GRADIENT_ACCUMULATION_STEPS': 5,
#  'log_bleu_steps_factor': 50}

{'experiment_name': 't2c_concode_220428_v20',
 'fn_train_dataset': '/root/data/t2c_train.json',
 'fn_eval_dataset': '/root/data/t2c_answers.json',
 'default_model': 'decapoda-research/llama-7b-hf',
 'MICRO_BATCH_SIZE': 2,
 'BATCH_SIZE': 10,
 'EPOCHS': 2,
 'LEARNING_RATE': 0.0002,
 'CUTOFF_LEN': 256,
 'LORA_R': 64,
 'LORA_ALPHA': 16,
 'LORA_DROPOUT': 0.05,
 'warmup_steps': 200,
 'fp16': True,
 'logging_steps': 10,
 'eval_steps': 100,
 'evaluation_strategy': 'steps',
 'save_total_limit': 1,
 'save_strategy': 'steps',
 'save_steps': 500,
 'seed': 42,
 'logging_strategy': 'steps',
 'report_to': 'tensorboard',
 'mlm': False,
 'truncation': True,
 'padding': 'max_length',
 'config_use_cache': False,
 'resume_from_checkpoint': True,
 'bleu_batch_size': 5,
 'GRADIENT_ACCUMULATION_STEPS': 5,
 'log_bleu_steps_factor': 50}

In [17]:
# experiment_config["resume_from_checkpoint"]

In [18]:
from prompter import Prompter
prompter = Prompter()

def generate_prompt(data_point):
    if "input" in data_point and data_point["input"]:
        return prompter.generate_prompt(instruction = data_point["instruction"],
                                        input = data_point["input"],
                                        label = data_point["output"]
                                       )
    else:
        return prompter.generate_prompt(instruction = data_point["instruction"],
                                        #input = None,
                                        label = data_point["output"]
                                       )

/root/ipynb/prompter/templates/


In [None]:
# def generate_prompt(data_point):
#     # sorry about the formatting disaster gotta move fast
#     if data_point["input"]:
#         return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
# ### Instruction:
# {data_point["instruction"]}
# ### Input:
# {data_point["input"]}
# ### Response:
# {data_point["output"]}"""
#     else:
#         return f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
# ### Instruction:
# {data_point["instruction"]}
# ### Response:
# {data_point["output"]}"""


data = data.shuffle().map(
    lambda data_point: tokenizer(
        generate_prompt(data_point),
        truncation=experiment_config["truncation"],
        max_length=experiment_config["CUTOFF_LEN"],
        padding=experiment_config["padding"]
    )
)

trainer = transformers.Trainer(
    model=model,
    tokenizer=tokenizer_val,
    train_dataset=data["train"],
    eval_dataset=data['eval'],
    args=transformers.TrainingArguments(
        per_device_train_batch_size=experiment_config["MICRO_BATCH_SIZE"],
        gradient_accumulation_steps=experiment_config["GRADIENT_ACCUMULATION_STEPS"],
        warmup_steps=experiment_config["warmup_steps"],
        num_train_epochs=experiment_config["EPOCHS"],
        learning_rate=experiment_config["LEARNING_RATE"],
        fp16=experiment_config["fp16"],
        logging_steps=experiment_config["logging_steps"],        
        evaluation_strategy = experiment_config['evaluation_strategy'],
        eval_steps=experiment_config["eval_steps"],
        output_dir=current_experiment_path,#"lora-alpaca",
        save_total_limit=experiment_config["save_total_limit"],
        save_strategy = experiment_config["save_strategy"],
        
        save_steps = experiment_config["save_steps"],
        seed=experiment_config["seed"],
        logging_dir=current_experiment_path,
        logging_strategy=experiment_config["logging_strategy"],
        report_to=experiment_config["report_to"]
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, 
                                                               mlm=experiment_config["mlm"]
                                                              ),
    callbacks = [MyCustomCallback]
)
model.config.use_cache = experiment_config["config_use_cache"]
# print(len(trainer.optimizer.state['found_inf_per_device']))


trainer.train(resume_from_checkpoint=experiment_config["resume_from_checkpoint"])

model.save_pretrained(current_experiment_path)

Map:   0%|          | 0/100000 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]



Step,Training Loss,Validation Loss
600,0.9753,1.120254
700,1.0037,1.114299
800,0.9834,1.11295
900,1.0072,1.111197
1000,0.9425,1.110582
1100,0.9821,1.106141
1200,0.8968,1.107864
1300,0.9189,1.103898
1400,0.9007,1.104232
1500,0.9215,1.102302


generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



  0%|                                                    | 0/30 [00:00<?, ?it/s]Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
100%|███████████████████████████████████████████| 30/30 [01:45<00:00,  3.52s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 152335.50it/s]


{'EM': 0.0, 'BLEU': 0.20347627636332513, 'brevity_penalty': 0.6220534596927131, 'ratio': 0.6780905752753978, 'translation_length': 554, 'reference_length': 817, 'precisions_0': 0.581981981981982, 'precisions_1': 0.40190476190476193, 'precisions_2': 0.26464646464646463, 'precisions_3': 0.18494623655913978}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:45<00:00,  3.50s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 175738.99it/s]


{'EM': 1.0, 'BLEU': 0.1969144737196191, 'brevity_penalty': 0.6467131914862885, 'ratio': 0.6964504283965728, 'translation_length': 569, 'reference_length': 817, 'precisions_0': 0.5824561403508772, 'precisions_1': 0.38333333333333336, 'precisions_2': 0.24313725490196078, 'precisions_3': 0.15833333333333333}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:45<00:00,  3.50s/it]
100%|████████████████████████████████████████| 30/30 [00:00<00:00, 48545.19it/s]


{'EM': 0.0, 'BLEU': 0.1611390084801304, 'brevity_penalty': 0.5784816314703292, 'ratio': 0.6462668298653611, 'translation_length': 528, 'reference_length': 817, 'precisions_0': 0.5860113421550095, 'precisions_1': 0.3667334669338677, 'precisions_2': 0.21961620469083157, 'precisions_3': 0.1275626423690205}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:44<00:00,  3.49s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 161942.24it/s]


{'EM': 0.0, 'BLEU': 0.16875967267059536, 'brevity_penalty': 0.5682839349897214, 'ratio': 0.6389228886168911, 'translation_length': 522, 'reference_length': 817, 'precisions_0': 0.5774378585086042, 'precisions_1': 0.385395537525355, 'precisions_2': 0.24406047516198703, 'precisions_3': 0.14318706697459585}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:41<00:00,  3.39s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 160496.33it/s]


{'EM': 0.0, 'BLEU': 0.17409849496466578, 'brevity_penalty': 0.57678561653526, 'ratio': 0.6450428396572827, 'translation_length': 527, 'reference_length': 817, 'precisions_0': 0.6060606060606061, 'precisions_1': 0.37751004016064255, 'precisions_2': 0.23717948717948717, 'precisions_3': 0.15296803652968036}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:41<00:00,  3.38s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 164482.51it/s]


{'EM': 0.0, 'BLEU': 0.22555922519628963, 'brevity_penalty': 0.6187385540544988, 'ratio': 0.6756425948592412, 'translation_length': 552, 'reference_length': 817, 'precisions_0': 0.6365280289330922, 'precisions_1': 0.4474187380497132, 'precisions_2': 0.3022312373225152, 'precisions_3': 0.20518358531317496}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [02:02<00:00,  4.07s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 163414.44it/s]


{'EM': 0.0, 'BLEU': 0.20693088754970798, 'brevity_penalty': 0.640172441475687, 'ratio': 0.6915544675642595, 'translation_length': 565, 'reference_length': 817, 'precisions_0': 0.5901060070671378, 'precisions_1': 0.39552238805970147, 'precisions_2': 0.25889328063241107, 'precisions_3': 0.18067226890756302}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:45<00:00,  3.52s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 161734.09it/s]


{'EM': 0.0, 'BLEU': 0.22708627247520077, 'brevity_penalty': 0.687000076489837, 'ratio': 0.7270501835985312, 'translation_length': 594, 'reference_length': 817, 'precisions_0': 0.5915966386554622, 'precisions_1': 0.4, 'precisions_2': 0.27102803738317754, 'precisions_3': 0.18613861386138614}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:46<00:00,  3.54s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 162569.92it/s]


{'EM': 0.0, 'BLEU': 0.22367026231682596, 'brevity_penalty': 0.6854084757090199, 'ratio': 0.7258261933904528, 'translation_length': 593, 'reference_length': 817, 'precisions_0': 0.5892255892255892, 'precisions_1': 0.4024822695035461, 'precisions_2': 0.26779026217228463, 'precisions_3': 0.17857142857142858}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:46<00:00,  3.54s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 164913.66it/s]

{'EM': 0.0, 'BLEU': 0.2130986293039118, 'brevity_penalty': 0.6483443410015097, 'ratio': 0.6976744186046512, 'translation_length': 570, 'reference_length': 817, 'precisions_0': 0.5796847635726795, 'precisions_1': 0.3955637707948244, 'precisions_2': 0.2720156555772994, 'precisions_3': 0.18711018711018712}





generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:42<00:00,  3.42s/it]
100%|████████████████████████████████████████| 30/30 [00:00<00:00, 51781.53it/s]


{'EM': 0.0, 'BLEU': 0.21851380937922585, 'brevity_penalty': 0.6758238404571268, 'ratio': 0.7184822521419829, 'translation_length': 587, 'reference_length': 817, 'precisions_0': 0.5884353741496599, 'precisions_1': 0.3888888888888889, 'precisions_2': 0.26136363636363635, 'precisions_3': 0.1827309236947791}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:45<00:00,  3.51s/it]
100%|████████████████████████████████████████| 30/30 [00:00<00:00, 54708.31it/s]


{'EM': 0.0, 'BLEU': 0.2209089188776829, 'brevity_penalty': 0.6901782584838726, 'ratio': 0.7294981640146879, 'translation_length': 596, 'reference_length': 817, 'precisions_0': 0.592964824120603, 'precisions_1': 0.3880070546737213, 'precisions_2': 0.2569832402234637, 'precisions_3': 0.17751479289940827}
generation_config: GenerationConfig {
  "max_new_tokens": 30,
  "transformers_version": "4.28.1"
}



100%|███████████████████████████████████████████| 30/30 [01:43<00:00,  3.45s/it]
100%|███████████████████████████████████████| 30/30 [00:00<00:00, 163626.94it/s]


{'EM': 0.0, 'BLEU': 0.22375966226223992, 'brevity_penalty': 0.6629514358413311, 'ratio': 0.7086903304773562, 'translation_length': 579, 'reference_length': 817, 'precisions_0': 0.6189655172413793, 'precisions_1': 0.4163636363636364, 'precisions_2': 0.27115384615384613, 'precisions_3': 0.18571428571428572}


In [None]:
model.save_pretrained(current_experiment_path)

In [None]:
# !rm -rf {current_experiment_path}

In [21]:
!df -h .

The history saving thread hit an unexpected error (OperationalError('unable to open database file')).History will not be written to the database.
Filesystem      Size  Used Avail Use% Mounted on
overlay          45G   45G   84K 100% /


In [22]:
!ls -lah {current_experiment_path}

total 56K
drwxr-xr-x  6 root root  250 May 10 23:47 .
drwxr-xr-x 13 root root 4.0K May 10 22:36 ..
drwxr-xr-x  2 root root   65 May 10 22:38 1683758323.605443
drwxr-xr-x  2 root root   65 May 10 22:38 1683758323.6125872
drwxr-xr-x  2 root root   31 May 10 23:47 checkpoint-1000
drwxr-xr-x  2 root root  243 May 10 23:14 checkpoint-500
-rw-r--r--  1 root root  22K May 10 23:47 events.out.tfevents.1683758323.8d048d63ed1a.14655.0
-rw-r--r--  1 root root  24K May 10 23:47 events.out.tfevents.1683758323.8d048d63ed1a.14655.2
-rw-r--r--  1 root root  794 May 10 22:36 experiment_config.json


In [23]:
current_experiment_path

'/root/experiments/t2c_concode_220428_v20'

In [28]:
!ls -lah /root/experiments/t2c_concode_220428_v20/checkpoint-500

total 7.0G
drwxr-xr-x 2 root root  243 May 10 23:14 .
drwxr-xr-x 6 root root  250 May 10 23:47 ..
-rw-r--r-- 1 root root 257M May 10 23:14 optimizer.pt
-rw-r--r-- 1 root root 6.7G May 10 23:14 pytorch_model.bin
-rw-r--r-- 1 root root  15K May 10 23:14 rng_state.pth
-rw-r--r-- 1 root root  557 May 10 23:14 scaler.pt
-rw-r--r-- 1 root root  627 May 10 23:14 scheduler.pt
-rw-r--r-- 1 root root  423 May 10 23:14 special_tokens_map.json
-rw-r--r-- 1 root root 489K May 10 23:14 tokenizer.model
-rw-r--r-- 1 root root  714 May 10 23:14 tokenizer_config.json
-rw-r--r-- 1 root root 7.1K May 10 23:14 trainer_state.json
-rw-r--r-- 1 root root 3.5K May 10 23:14 training_args.bin


In [44]:
!ls -lah /root/experiments/t2c_concode_220428_v20/checkpoint-500

total 7.0G
drwxr-xr-x 2 root root  243 May 10 23:14 .
drwxr-xr-x 5 root root  227 May 11 07:54 ..
-rw-r--r-- 1 root root 257M May 10 23:14 optimizer.pt
-rw-r--r-- 1 root root 6.7G May 10 23:14 pytorch_model.bin
-rw-r--r-- 1 root root  15K May 10 23:14 rng_state.pth
-rw-r--r-- 1 root root  557 May 10 23:14 scaler.pt
-rw-r--r-- 1 root root  627 May 10 23:14 scheduler.pt
-rw-r--r-- 1 root root  423 May 10 23:14 special_tokens_map.json
-rw-r--r-- 1 root root 489K May 10 23:14 tokenizer.model
-rw-r--r-- 1 root root  714 May 10 23:14 tokenizer_config.json
-rw-r--r-- 1 root root 7.1K May 10 23:14 trainer_state.json
-rw-r--r-- 1 root root 3.5K May 10 23:14 training_args.bin


In [58]:
# !rm -rf /root/experiments/t2c_concode_220428_v19/checkpoint-20000/
# !df -h .

Filesystem      Size  Used Avail Use% Mounted on
overlay          45G   32G   14G  71% /


In [None]:
# !rm -rf {current_experiment_path}/checkpoint-1000

In [None]:
print(123)

In [None]:
# import sys
# sys.exit()

In [None]:
0