In [1]:
# running on gpu
import torch

if torch.cuda.is_available():
    device = torch.device("cuda:0")

torch.cuda.get_device_properties(0)

_CudaDeviceProperties(name='NVIDIA GeForce RTX 2070 SUPER', major=7, minor=5, total_memory=8191MB, multi_processor_count=40)

In [7]:
from datasets import load_dataset

sleep_dataset = "thinkersloop/sleep-dataset-llm"
car_dataset = "thinkersloop/car-dataset-llm"

hf_sleep_dataset = load_dataset(sleep_dataset)
hf_car_dataset = load_dataset(car_dataset)

In [3]:
import os
from datasets import load_dataset
from unsloth import FastLanguageModel, is_bfloat16_supported
from trl import SFTTrainer
from transformers import TrainingArguments

# Constants
MAX_SEQ_LENGTH = 512
MODEL_ID = "unsloth/mistral-7b-v0.3-bnb-4bit"
MODELS_DIR = "../models"
NUM_EPOCHS = 3

# Ensure models directory exists
os.makedirs(MODELS_DIR, exist_ok=True)

# Function to load and prepare dataset
def prepare_dataset(dataset_name):
    dataset = load_dataset(dataset_name)
    return dataset

# Function to initialize model
def initialize_model():
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name=MODEL_ID,
        max_seq_length=MAX_SEQ_LENGTH,
        dtype=None,  # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
        load_in_4bit=True,
    )

    model = FastLanguageModel.get_peft_model(
        model,
        r=16,
        target_modules=[
            "q_proj",
            "k_proj",
            "v_proj",
            "o_proj",
            "gate_proj",
            "up_proj",
            "down_proj",
        ],
        lora_alpha=16,
        lora_dropout=0,
        bias="none",
        use_gradient_checkpointing="unsloth",
        random_state=3407,
        use_rslora=False,
        loftq_config=None,
    )

    return model, tokenizer

# Function to train model
def train_model(model, tokenizer, dataset, output_dir):
    trainer = SFTTrainer(
        model=model,
        tokenizer=tokenizer,
        train_dataset=dataset["train"],
        eval_dataset=dataset["test"],
        dataset_text_field="messages",
        max_seq_length=MAX_SEQ_LENGTH,
        dataset_num_proc=2,
        packing=False,  # Can make training 5x faster for short sequences.
        args=TrainingArguments(
            per_device_train_batch_size=1,  # more granularity
            gradient_accumulation_steps=1,  # more granularity
            warmup_steps=5,
            max_steps=60,
            learning_rate=2e-4,
            fp16=not is_bfloat16_supported(),
            bf16=is_bfloat16_supported(),
            logging_steps=1,
            optim="adamw_8bit",
            weight_decay=0.01,
            lr_scheduler_type="linear",
            seed=3407,
            output_dir=output_dir,
        ),
    )

    trainer.train()
    return trainer

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


In [6]:
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

GPU = NVIDIA GeForce RTX 2070 SUPER. Max memory = 8.0 GB.
0.0 GB of memory reserved.


In [5]:
# Train sleep model
print("Training sleep model...")
sleep_dataset = prepare_dataset("thinkersloop/sleep-dataset-llm")
sleep_model, sleep_tokenizer = initialize_model()
sleep_trainer = train_model(sleep_model, sleep_tokenizer, sleep_dataset, os.path.join(MODELS_DIR, "sleep_model"))
sleep_trainer.save_model(os.path.join(MODELS_DIR, "sleep_model_final"))
sleep_tokenizer.save_pretrained(os.path.join(MODELS_DIR, "sleep_model_final"))
print("Sleep model training completed.")


Training sleep model...
==((====))==  Unsloth: Fast Mistral patching release 2024.7
   \\   /|    GPU: NVIDIA GeForce RTX 2070 SUPER. Max memory: 8.0 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.3.0+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.26.post1. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth


Unsloth: Will load unsloth/mistral-7b-v0.3-bnb-4bit as a legacy tokenizer.
Unsloth 2024.7 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.
max_steps is given, it will override any value given in num_train_epochs
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 24 | Num Epochs = 3
O^O/ \_/ \    Batch size per device = 1 | Gradient Accumulation steps = 1
\        /    Total batch size = 1 | Total steps = 60
 "-____-"     Number of trainable parameters = 41,943,040


Step,Training Loss
1,1.1317
2,0.8403
3,0.9772
4,1.1925
5,1.5253
6,1.3154
7,1.179
8,0.8106
9,0.8913
10,0.8321


Sleep model training completed.


In [4]:
# Train car model
print("Training car model...")
car_dataset = prepare_dataset("thinkersloop/car-dataset-llm")
car_model, car_tokenizer = initialize_model()
car_trainer = train_model(car_model, car_tokenizer, car_dataset, os.path.join(MODELS_DIR, "car_model"))
car_trainer.save_model(os.path.join(MODELS_DIR, "car_model_final"))
car_tokenizer.save_pretrained(os.path.join(MODELS_DIR, "car_model_final"))
print("Car model training completed.")


Training car model...
==((====))==  Unsloth: Fast Mistral patching release 2024.7
   \\   /|    GPU: NVIDIA GeForce RTX 2070 SUPER. Max memory: 8.0 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.3.0+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.26.post1. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth


Unsloth: Will load unsloth/mistral-7b-v0.3-bnb-4bit as a legacy tokenizer.
Unsloth 2024.7 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.
Map (num_proc=2): 100%|██████████| 23/23 [00:00<00:00, 120.57 examples/s]
Map (num_proc=2): 100%|██████████| 3/3 [00:00<00:00, 17.52 examples/s]
max_steps is given, it will override any value given in num_train_epochs
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 23 | Num Epochs = 3
O^O/ \_/ \    Batch size per device = 1 | Gradient Accumulation steps = 1
\        /    Total batch size = 1 | Total steps = 60
 "-____-"     Number of trainable parameters = 41,943,040


Step,Training Loss
1,0.9419
2,1.0452
3,1.3824
4,1.0013
5,1.4163
6,1.3747
7,1.0485
8,1.2589
9,1.1569
10,1.3044


Car model training completed.


In [4]:
from huggingface_hub import HfApi, HfFolder
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()
hf_token = os.getenv("HF_TOKEN")

# Function to push model to Hugging Face Hub
def push_to_hub(model_dir, repo_name):
    api = HfApi()
    # Create the repository if it doesn't exist
    api.create_repo(repo_id=repo_name, token=hf_token, repo_type="model", private=False, exist_ok=True)
    api.upload_folder(
        folder_path=model_dir,
        repo_id=repo_name,
        token=hf_token,
        repo_type="model",
    )

push_to_hub(os.path.join(MODELS_DIR, "sleep_model_final"), "thinkersloop/sleep-llm-model")
push_to_hub(os.path.join(MODELS_DIR, "car_model_final"), "thinkersloop/car-llm-model")

print("Training and uploading completed. Models are available on Hugging Face Hub.")

No files have been modified since last commit. Skipping to prevent empty commit.
No files have been modified since last commit. Skipping to prevent empty commit.


Training and uploading completed. Models are available on Hugging Face Hub.


In [None]:
used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
used_percentage = round(used_memory / max_memory * 100, 3)
lora_percentage = round(used_memory_for_lora / max_memory * 100, 3)
print(f"Peak reserved memory = {used_memory} GB.")
print(f"Peak reserved memory for training = {used_memory_for_lora} GB.")
print(f"Peak reserved memory % of max memory = {used_percentage} %.")
print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.")