# Load Libraries

In [1]:
import os
from dotenv import load_dotenv
load_dotenv()

from autotrain.params import LLMTrainingParams
from autotrain.project import AutoTrainProject

HUGGINGFACE_BASE_MODEL = "meta-llama/Llama-3.2-3B"
DATA = "iamraymondlow/sharegpt_censored"  # iamraymondlow/sharegpt_censored, iamraymondlow/sharegpt_uncensored
PROJECT_NAME = "test-project"  # llama-3.2-3B-instructiontuned-censored
RANDOM_SEED = 42
BACKEND = "spaces-cpu-basic"


# Create Hugging Face Finetuning Project

In [None]:
params = LLMTrainingParams(
    model=HUGGINGFACE_BASE_MODEL,
    project_name=PROJECT_NAME,
    data_path=DATA,
    train_split="train",
    model_max_length=128000,
    padding="right",
    trainer="sft",
    log="tensorboard",
    auto_find_batch_size=True,
    mixed_precision="none",
    lr=1e-5,
    epochs=1,  # 100
    batch_size=4,
    gradient_accumulation=4,
    optimizer="adamw_torch",
    scheduler="cosine",
    seed=RANDOM_SEED,
    chat_template="tokenizer",
    quantization="none",
    target_modules="all-linear",
    merge_adapter=True,
    peft=True,
    max_prompt_length=64000,
    max_completion_length=64000,
    text_column="text",
    push_to_hub=True,
    username=os.environ.get("HF_USERNAME"),
    token=os.environ.get("HF_TOKEN"),    
)

project = AutoTrainProject(params=params, backend=BACKEND, process=True)
project.create()