In [None]:
import json
import math
import os
import shutil
import sys
import threading
import time
import traceback
from datetime import datetime
from pathlib import Path

import torch
import transformers
from datasets import Dataset, load_dataset
from peft import (
    LoraConfig,
    get_peft_model,
    prepare_model_for_int8_training,
    set_peft_model_state_dict
)
from peft.utils.other import TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING as model_to_lora_modules

# Set up your arbitrary parameters here
lora_name = "example_model"
always_override = False
save_steps = 2000
micro_batch_size = 4
batch_size = 128
epochs = 3
learning_rate = "3e-4"
lr_scheduler_type = "linear"
lora_rank = 32
lora_alpha = 64
lora_dropout = 0.05
cutoff_len = 256
dataset = "example_dataset"
eval_dataset = None
format = "example_format"
eval_steps = 1000
raw_text_file = None
overlap_len = 128
newline_favor_len = 128
higher_rank_limit = False
warmup_steps = 100
optimizer = "adamw_torch"
hard_cut_string = "\n\n\n"
train_only_after = ""
stop_at_loss = 0.0
add_eos_token = False
min_chars = 0
report_to = "None"

# Other global variables
PARAMETERS = ["lora_name", "always_override", "save_steps", "micro_batch_size", "batch_size", "epochs", "learning_rate",
              "lr_scheduler_type", "lora_rank", "lora_alpha", "lora_dropout", "cutoff_len", "dataset", "eval_dataset",
              "format", "eval_steps", "raw_text_file", "overlap_len", "newline_favor_len", "higher_rank_limit",
              "warmup_steps", "optimizer", "hard_cut_string", "train_only_after", "stop_at_loss", "add_eos_token",
              "min_chars", "report_to"]
WANT_INTERRUPT = False

def clean_path(base_path: str, path: str):
    path = path.replace("\\", "/").replace("..", "_")
    if base_path is None:
        return path
    return f"{Path(base_path).absolute()}/{path}"

def backup_adapter(input_folder):
    # Get the creation date of the file adapter_model.bin
    try:
        adapter_file = Path(f"{input_folder}/adapter_model.bin")
        if adapter_file.is_file():
            print("Backing up existing LoRA adapter...")
            creation_date = datetime.fromtimestamp(adapter_file.stat().st_ctime)
            creation_date_str = creation_date.strftime("Backup-%Y-%m-%d")

            # Create the new subfolder
            subfolder_path = Path(f"{input_folder}/{creation_date_str}")
            subfolder_path.mkdir(parents=True, exist_ok=True)

            # Check if the file already exists in the subfolder
            backup_adapter_file = Path(f"{input_folder}/{creation_date_str}/adapter_model.bin")
            if backup_adapter_file.is_file():
                print(" - Backup already exists. Skipping backup process.")
                return

            # Copy existing files to the new subfolder
            existing_files = Path(input_folder).iterdir()
            for file in existing_files:
                if file.is_file():
                    shutil.copy2(file, subfolder_path)
    except Exception as e:
        print("An error occurred in backup_adapter:", str(e))

def calc_trainable_parameters(model):
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        num_params = param.numel()
        if num_params == 0 and hasattr(param, "ds_numel"):
            num_params = param.ds_numel
        all_param += num_params
        if param.requires_grad:
            trainable_params += num_params
    return trainable_params, all_param

def split_chunks(arr, size, step):
    for i in range(0, len(arr), step):
        yield arr[i:i + size]

def cut_chunk_for_newline(chunk: str, max_length: int):
    if '\n' not in chunk:
        return chunk
    first_newline = chunk.index('\n')
    if first_newline < max_length:
        chunk = chunk[first_newline + 1:]
    if '\n' not in chunk:
        return chunk
    last_newline = chunk.rindex('\n')
    if len(chunk) - last_newline < max_length:
        chunk = chunk[:last_newline]
    return chunk

def tokenize(prompt, append_eos_token=False):
    if train_only_after == '' or train_only_after not in prompt:
        input_ids = encode(prompt, True)
        if append_eos_token and input_ids[-1] != shared.tokenizer.eos_token_id and len(input_ids) < cutoff_len:
            input_ids.append(shared.tokenizer.eos_token_id)
        input_ids = [shared.tokenizer.pad_token_id] * (cutoff_len - len(input_ids)) + input_ids
        labels = [1] * len(input_ids)
    else:
        ind = prompt.index(train_only_after) + len(train_only_after)
        before_tokens = encode(prompt[:ind], True)
        after_tokens = encode(prompt[ind:], False)
        if append_eos_token and after_tokens[-1] != shared.tokenizer.eos_token_id:
            after_tokens.append(shared.tokenizer.eos_token_id)
        full_length = len(after_tokens) + len(before_tokens)
        if full_length > cutoff_len:
            after_tokens = after_tokens[:cutoff_len - len(before_tokens)]
        else:
            before_tokens = [shared.tokenizer.pad_token_id] * (cutoff_len - full_length) + before_tokens
        input_ids = before_tokens + after_tokens
        labels = [-100] * len(before_tokens) + [1] * len(after_tokens)
    input_ids = torch.tensor(input_ids)
    return {
        "input_ids": input_ids,
        "labels": labels,
        "attention_mask": input_ids.ne(shared.tokenizer.pad_token_id),
    }

def do_train():
    if not hasattr(shared.model, 'lm_head') or hasattr(shared.model.lm_head, 'weight'):
        prepare_model_for_int8_training(shared.model)

    lora_file_path = clean_path(shared.args.lora_dir, lora_name)
    actual_save_steps = math.ceil(save_steps / gradient_accumulation_steps)

    config = LoraConfig(
        r=lora_rank,
        lora_alpha=lora_alpha,
        target_modules=model_to_lora_modules[model_id],
        lora_dropout=lora_dropout,
        bias="none",
        task_type="CAUSAL_LM"
    )

    shared.model = 3 # transformer model from huggingface
    
    model_trainable_params, model_all_params = calc_trainable_parameters(shared.model)

    try:
        lora_model = get_peft_model(shared.model, config)

        if not always_override and Path(f"{lora_file_path}/adapter_model.bin").is_file():
            state_dict_peft = torch.load(f"{lora_file_path}/adapter_model.bin")
            set_peft_model_state_dict(lora_model, state_dict_peft)

    except:
        return traceback.format_exc()

    lora_model.config.use_cache = False

    if torch.__version__ >= "2" and sys.platform != "win32":
        lora_model = torch.compile(lora_model)

    train_log = {}
    lora_trainable_param, lora_all_param = calc_trainable_parameters(lora_model)
    configs = locals()

    with open(f"{lora_file_path}/training_parameters.json", "w", encoding="utf-8") as file:
        json.dump({x: configs[x] for x in PARAMETERS}, file, indent=2)

    def threaded_run():
        trainer = transformers.Trainer(
            model=lora_model,
            train_dataset=train_data,
            eval_dataset=eval_data,
            args=transformers.TrainingArguments(
                report_to=report_to if report_to != "None" else None,
                per_device_train_batch_size=micro_batch_size,
                gradient_accumulation_steps=gradient_accumulation_steps,
                warmup_steps=math.ceil(warmup_steps / gradient_accumulation_steps),
                num_train_epochs=epochs,
                learning_rate=actual_lr,
                fp16=False if shared.args.cpu else True,
                optim=optimizer,
                logging_steps=2 if stop_at_loss > 0 else 5,
                evaluation_strategy="steps" if eval_data is not None else "no",
                eval_steps=math.ceil(eval_steps / gradient_accumulation_steps) if eval_data is not None else None,
                save_strategy="steps" if eval_data is not None else "no",
                output_dir=lora_file_path,
                lr_scheduler_type=lr_scheduler_type,
                load_best_model_at_end=eval_data is not None,
                ddp_find_unused_parameters=None,
                no_cuda=shared.args.cpu,
            ),
            data_collator=transformers.DataCollatorForLanguageModeling(shared.tokenizer, mlm=False),
        )
        trainer.train()

    thread = threading.Thread(target=threaded_run)
    thread.start()
    last_step = 0
    start_time = time.perf_counter()

    while thread.is_alive():
        time.sleep(0.5)
        if WANT_INTERRUPT:
            return "Training interrupted"

        elif tracked.current_steps != last_step:
            last_step = tracked.current_steps
            time_elapsed = time.perf_counter() - start_time
            if time_elapsed <= 0:
                timer_info = ""
                total_time_estimate = 999
            else:
                its = tracked.current_steps / time_elapsed
                if its > 1:
                    timer_info = f"`{its:.2f}` it/s"
                else:
                    timer_info = f"`{1.0/its:.2f}` s/it"

                total_time_estimate = (1.0 / its) * (tracked.max_steps)

            return f"Running... {timer_info}, {format_time(time_elapsed)} / {format_time(total_time_estimate)} ... {format_time(total_time_estimate - time_elapsed)} remaining"

    if not tracked.did_save:
        lora_model.save_pretrained(lora_file_path)

    if WANT_INTERRUPT:
        return "Training interrupted"

    return "Training complete!"

def format_time(seconds: float):
    if seconds < 120:
        return f"`{seconds:.0f}` seconds"
    minutes = seconds / 60
    if minutes < 120:
        return f"`{minutes:.0f}` minutes"
    hours = minutes / 60
    return f"`{hours:.0f}` hours"

result = do_train()
print(result)