In [None]:
%%capture
# Installs Unsloth, Xformers (Flash Attention) and all other packages!
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --no-deps "xformers==0.0.27" trl peft accelerate bitsandbytes

* We support Llama, Mistral, Phi-3, Gemma, Yi, DeepSeek, Qwen, TinyLlama, Vicuna, Open Hermes etc
* We support 16bit LoRA or 4bit QLoRA. Both 2x faster.
* `max_seq_length` can be set to anything, since we do automatic RoPE Scaling via [kaiokendev's](https://kaiokendev.github.io/til) method.
* [**NEW**] We make Gemma-2 9b / 27b **2x faster**! See our [Gemma-2 9b notebook](https://colab.research.google.com/drive/1vIrqH5uYDQwsJ4-OO3DErvuv4pBgVwk4?usp=sharing)
* [**NEW**] To finetune and auto export to Ollama, try our [Ollama notebook](https://colab.research.google.com/drive/1WZDi7APtQ9VsvOrQSSC5DDtxq159j8iZ?usp=sharing)
* [**NEW**] We make Mistral NeMo 12B 2x faster and fit in under 12GB of VRAM! [Mistral NeMo notebook](https://colab.research.google.com/drive/17d3U-CAIwzmbDRqbZ9NnpHxCkmXB6LZ0?usp=sharing)

In [None]:
!pip install "unsloth[cu121-torch230] @ git+https://github.com/unslothai/unsloth.git"

Collecting unsloth@ git+https://github.com/unslothai/unsloth.git (from unsloth[cu121-torch230]@ git+https://github.com/unslothai/unsloth.git)
  Cloning https://github.com/unslothai/unsloth.git to /tmp/pip-install-apphdn0w/unsloth_aad60ed7eb2e4e8b9970e5bd073e6671
  Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth.git /tmp/pip-install-apphdn0w/unsloth_aad60ed7eb2e4e8b9970e5bd073e6671
  Resolved https://github.com/unslothai/unsloth.git to commit d0ca3497eb5911483339be025e9924cf73280178
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting xformers@ https://download.pytorch.org/whl/cu121/xformers-0.0.27-cp310-cp310-manylinux2014_x86_64.whl (from unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[cu121-torch230]@ git+https://github.com/unslothai/unsloth.git)
  Downloading https://download.pytorch.org/whl/cu121/xf

In [None]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

# 4bit pre quantized models we support for 4x faster downloading + no OOMs.
fourbit_models = [
    "unsloth/Meta-Llama-3.1-8B-bnb-4bit",      # Llama-3.1 15 trillion tokens model 2x faster!
    "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit",
    "unsloth/Meta-Llama-3.1-70B-bnb-4bit",
    "unsloth/Meta-Llama-3.1-405B-bnb-4bit",    # We also uploaded 4bit for 405b!
    "unsloth/tinyllama-bnb-4bit",
    "unsloth/Mistral-Nemo-Base-2407-bnb-4bit", # New Mistral 12b 2x faster!
    "unsloth/Mistral-Nemo-Instruct-2407-bnb-4bit",
    "unsloth/mistral-7b-v0.3-bnb-4bit",        # Mistral v3 2x faster!
    "unsloth/mistral-7b-instruct-v0.3-bnb-4bit",
    "unsloth/Phi-3.5-mini-instruct",           # Phi-3.5 2x faster!
    "unsloth/Phi-3-medium-4k-instruct",
    "unsloth/gemma-2-9b-bnb-4bit",
    "unsloth/gemma-2-27b-bnb-4bit",            # Gemma 2x faster!
] # More models at https://huggingface.co/unsloth

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Phi-3-medium-4k-instruct",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--unsloth--phi-3-medium-4k-instruct-bnb-4bit/snapshots/728d4af382a25baa9da3a5f179d3ff2cf98ddc3e/config.json
Model config MistralConfig {
  "_name_or_path": "unsloth/phi-3-medium-4k-instruct-bnb-4bit",
  "architectures": [
    "MistralForCausalLM"
  ],
  "attention_dropout": 0.0,
  "bos_token_id": 1,
  "eos_token_id": 32000,
  "head_dim": 128,
  "hidden_act": "silu",
  "hidden_size": 5120,
  "initializer_range": 0.02,
  "intermediate_size": 17920,
  "max_position_embeddings": 4096,
  "model_type": "mistral",
  "num_attention_heads": 40,
  "num_hidden_layers": 40,
  "num_key_value_heads": 10,
  "pad_token_id": 32009,
  "quantization_config": {
    "_load_in_4bit": true,
    "_load_in_8bit": false,
    "bnb_4bit_compute_dtype": "bfloat16",
    "bnb_4bit_quant_storage": "uint8",
    "bnb_4bit_quant_type": "nf4",
    "bnb_4bit_use_double_quant": true,
    "llm_int8_enable_fp32_cpu_offload": false,
    "l

==((====))==  Unsloth 2024.8: Fast Mistral patching. Transformers = 4.44.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.3.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.27. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--unslothai--colab/snapshots/20f9daee9da18936efa03ad4e1361884c60cca0c/config.json
Model config LlamaConfig {
  "_name_or_path": "unslothai/colab",
  "architectures": [
    "LlamaModel"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "bos_token_id": 1,
  "eos_token_id": 2,
  "hidden_act": "silu",
  "hidden_size": 0,
  "initializer_range": 0.02,
  "intermediate_size": 0,
  "max_position_embeddings": 0,
  "mlp_bias": false,
  "model_type": "llama",
  "num_attention_heads": 0,
  "num_hidden_layers": 0,
  "num_key_value_heads": 0,
  "pretraining_tp": 1,
  "rms_norm_eps": 1e-06,
  "rope_scaling": null,
  "rope_theta": 10000.0,
  "tie_word_embeddings": false,
  "torch_dtype": "float32",
  "transformers_version": "4.44.2",
  "use_cache": true,
  "vocab_size": 0
}

loading weights file model.safetensors from cache at /root/.cache/huggingface/hub/models--unslothai--colab/snapshots/20f9daee9da189

model.safetensors.index.json:   0%|          | 0.00/165k [00:00<?, ?B/s]

loading weights file model.safetensors from cache at /root/.cache/huggingface/hub/models--unsloth--phi-3-medium-4k-instruct-bnb-4bit/snapshots/728d4af382a25baa9da3a5f179d3ff2cf98ddc3e/model.safetensors.index.json


Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/3.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.72G [00:00<?, ?B/s]

Instantiating MistralForCausalLM model under default dtype torch.float16.
Generate config GenerationConfig {
  "bos_token_id": 1,
  "eos_token_id": 32000,
  "pad_token_id": 32009
}

target_dtype {target_dtype} is replaced by `CustomDtype.INT4` for 4-bit BnB quantization


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

All model checkpoint weights were used when initializing MistralForCausalLM.

All the weights of MistralForCausalLM were initialized from the model checkpoint at unsloth/phi-3-medium-4k-instruct-bnb-4bit.
If your task is similar to the task the model of the checkpoint was trained on, you can already use MistralForCausalLM for predictions without further training.


generation_config.json:   0%|          | 0.00/194 [00:00<?, ?B/s]

loading configuration file generation_config.json from cache at /root/.cache/huggingface/hub/models--unsloth--phi-3-medium-4k-instruct-bnb-4bit/snapshots/728d4af382a25baa9da3a5f179d3ff2cf98ddc3e/generation_config.json
Generate config GenerationConfig {
  "bos_token_id": 1,
  "eos_token_id": [
    32000,
    32001,
    32007
  ],
  "max_length": 4096,
  "pad_token_id": 32009
}



tokenizer_config.json:   0%|          | 0.00/3.25k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/458 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

We now add LoRA adapters so we only need to update 1 to 10% of all parameters!

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

Unsloth 2024.8 patched 40 layers with 40 QKV layers, 40 O layers and 40 MLP layers.


<a name="Data"></a>
### Data Prep
We now use the Alpaca dataset from [yahma](https://huggingface.co/datasets/yahma/alpaca-cleaned), which is a filtered version of 52K of the original [Alpaca dataset](https://crfm.stanford.edu/2023/03/13/alpaca.html). You can replace this code section with your own data prep.

**[NOTE]** To train only on completions (ignoring the user's input) read TRL's docs [here](https://huggingface.co/docs/trl/sft_trainer#train-on-completions-only).

**[NOTE]** Remember to add the **EOS_TOKEN** to the tokenized output!! Otherwise you'll get infinite generations!

If you want to use the `llama-3` template for ShareGPT datasets, try our conversational [notebook](https://colab.research.google.com/drive/1XamvWYinY6FOSX9GLvnqSjjsNflxdhNc?usp=sharing).

For text completions like novel writing, try this [notebook](https://colab.research.google.com/drive/1ef-tab5bhkvWmBOObepl1WgJvfvSzn5Q?usp=sharing).

## Custom synthetic dataset

In [None]:
!pip install datasets



In [None]:
import pandas as pd
from datasets import load_dataset
# Load your CSV into a DataFrame (adjust column names if needed)
# Convert DataFrame to a Dataset
dataset_training = load_dataset('csv', data_files='/content/training_dataset.csv')

In [None]:
# Convert DataFrame to a Dataset
dataset_testing = load_dataset('csv', data_files='/content/testing_dataset.csv')

In [None]:
dataset_testing

DatasetDict({
    train: Dataset({
        features: ['Label', 'Event ID', 'Service Name', 'Service File Name', 'Service Type', 'Service Start Type', 'Service Account', 'Data Service Name', 'Timestamp', 'ID'],
        num_rows: 9976
    })
})

In [None]:
# Print the data types of the features in the test set
print(dataset_testing["train"].features)


{'Label': Value(dtype='string', id=None), 'Event ID': Value(dtype='string', id=None), 'Service Name': Value(dtype='string', id=None), 'Service File Name': Value(dtype='string', id=None), 'Service Type': Value(dtype='string', id=None), 'Service Start Type': Value(dtype='string', id=None), 'Service Account': Value(dtype='string', id=None), 'Data Service Name': Value(dtype='string', id=None), 'Timestamp': Value(dtype='string', id=None), 'ID': Value(dtype='string', id=None)}


In [None]:
# Print the data types of the features in the test set
print(dataset_training["train"].features)

{'Label': Value(dtype='string', id=None), 'Event ID': Value(dtype='string', id=None), 'Service Name': Value(dtype='string', id=None), 'Service File Name': Value(dtype='string', id=None), 'Service Type': Value(dtype='string', id=None), 'Service Start Type': Value(dtype='string', id=None), 'Service Account': Value(dtype='string', id=None), 'Data Service Name': Value(dtype='string', id=None), 'Timestamp': Value(dtype='string', id=None), 'ID': Value(dtype='string', id=None)}


In [None]:
# Define the new feature value
instruction_text = "Classify the following service event as benign or malignant"

# Function to add the new feature
def add_instructions(examples):
    # Add the 'instructions' feature with the specified text
    examples['instruction'] = instruction_text
    return examples

# Apply the function to the dataset
dataset_testing = dataset_testing.map(add_instructions)

In [None]:
dataset_training = dataset_training.map(add_instructions)

In [None]:
dataset_training

DatasetDict({
    train: Dataset({
        features: ['Label', 'Event ID', 'Service Name', 'Service File Name', 'Service Type', 'Service Start Type', 'Service Account', 'Data Service Name', 'Timestamp', 'ID', 'instruction'],
        num_rows: 9976
    })
})

In [None]:
dataset_testing

DatasetDict({
    train: Dataset({
        features: ['Label', 'Event ID', 'Service Name', 'Service File Name', 'Service Type', 'Service Start Type', 'Service Account', 'Data Service Name', 'Timestamp', 'ID', 'instruction'],
        num_rows: 9976
    })
})

In [None]:
# Define the function to combine the columns into a new feature
def combine_features(example):
    # Combine all columns except 'Label' into a single string
    features_text = ", ".join([f"{col}: {example[col]}" for col in dataset_testing['train'].column_names if col != 'Label'])
    # Add this combined text as a new feature
    example['input'] = features_text
    return example

# Apply the function to the dataset
dataset_testing = dataset_testing.map(combine_features)

In [None]:
# Define the function to combine the columns into a new feature
def combine_features(example):
    # Combine all columns except 'Label' into a single string
    features_text = ", ".join([f"{col}: {example[col]}" for col in dataset_training['train'].column_names if col != 'Label'])
    # Add this combined text as a new feature
    example['input'] = features_text
    return example

# Apply the function to the dataset
dataset_training = dataset_training.map(combine_features)

In [None]:
# Check the updated dataset
dataset_testing

DatasetDict({
    train: Dataset({
        features: ['Label', 'Event ID', 'Service Name', 'Service File Name', 'Service Type', 'Service Start Type', 'Service Account', 'Data Service Name', 'Timestamp', 'ID', 'instruction', 'input'],
        num_rows: 9976
    })
})

In [None]:
dataset_training

DatasetDict({
    train: Dataset({
        features: ['Label', 'Event ID', 'Service Name', 'Service File Name', 'Service Type', 'Service Start Type', 'Service Account', 'Data Service Name', 'Timestamp', 'ID', 'instruction', 'input'],
        num_rows: 9976
    })
})

In [None]:
# Specify the columns to drop
columns_to_drop = [
    'Event ID', 'Service Name', 'Service File Name', 'Service Type',
    'Service Start Type', 'Service Account', 'Data Service Name',
    'Timestamp', 'ID'
]

In [None]:
# Drop the specified columns
dataset_training = dataset_training.remove_columns(columns_to_drop)

# Check the updated dataset
dataset_training

DatasetDict({
    train: Dataset({
        features: ['Label', 'instruction', 'input'],
        num_rows: 9976
    })
})

In [None]:
# Drop the specified columns
dataset_testing = dataset_testing.remove_columns(columns_to_drop)

# Check the updated dataset
dataset_testing

DatasetDict({
    train: Dataset({
        features: ['Label', 'instruction', 'input'],
        num_rows: 9976
    })
})

In [None]:
# Rename the 'Label' column to 'output'
dataset_training = dataset_training.rename_column('Label', 'output')
dataset_training

DatasetDict({
    train: Dataset({
        features: ['output', 'instruction', 'input'],
        num_rows: 9976
    })
})

In [None]:
# Rename the 'Label' column to 'output'
dataset_testing = dataset_testing.rename_column('Label', 'output')
dataset_testing

DatasetDict({
    train: Dataset({
        features: ['output', 'instruction', 'input'],
        num_rows: 9976
    })
})

In [None]:
from datasets import Dataset, DatasetDict

dataset = DatasetDict({
    'train': dataset_training['train'],
    'test': dataset_testing['train']
})

In [None]:
dataset

DatasetDict({
    train: Dataset({
        features: ['output', 'instruction', 'input'],
        num_rows: 9976
    })
    test: Dataset({
        features: ['output', 'instruction', 'input'],
        num_rows: 9976
    })
})

In [None]:
from huggingface_hub import HfApi, HfFolder

# Log in to Hugging Face
HfFolder.save_token("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")

# Upload the dataset to Hugging Face
dataset_id = "BMarz/ya2"  # Replace with your username and desired dataset name

dataset.push_to_hub(dataset_id)

NameError: name 'dataset' is not defined

In [None]:
from datasets import load_dataset
dataset = load_dataset("BMarz/ya2", split = "train")

In [None]:
dataset['input']

['Event ID: 7045, Service Name: WSearch, Service File Name: C:\\Windows\\System32\\SearchIndexer.exe, Service Type: Own Process, Service Start Type: Auto Start, Service Account: LocalSystem, Data Service Name: WindowsSearch, Timestamp: 2024-09-02T08:41:19.219Z, ID: 41f59d6e-82a5-4753-9b0e-e5a86a411eY, instruction: Classify the following service event as benign or malignant',
 'Event ID: 7045, Service Name: BackupService, Service File Name: C:\\Program Files\\Microsoft Backup\\backup.exe, Service Type: Own Process, Service Start Type: Auto Start, Service Account: NT AUTHORITY\\SYSTEM, Data Service Name: WindowsBackup, Timestamp: 2024-03-22T10:47:13.219Z, ID: 4a6b4e3f-1167-42a4-8a4f-7382f4e5bc23Y, instruction: Classify the following service event as benign or malignant',
 'Event ID: 7045, Service Name: NetmanService, Service File Name: C:\\Windows\\System32\\netman.dll, Service Type: Shared Process, Service Start Type: Demand Start, Service Account: NT AUTHORITY\\LocalService, Data Servi

In [None]:
dataset

Dataset({
    features: ['output', 'instruction', 'input', 'text'],
    num_rows: 9976
})

In [None]:
alpaca_prompt = """You are an expert in cybersecurity. You can idenitfy which Windows Event ID 7045 log entries are malignant (hamrful) to the system, and which are benign (generted by Windows or autheticated services). Below is an instruction that describes a task, paired with an input that provides further context to the type of features the service has. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
def formatting_prompts_func(examples):
    instructions = examples["instruction"]
    inputs       = examples["input"]
    outputs      = examples["output"]
    texts = []
    for instruction, input, output in zip(instructions, inputs, outputs):
        # Must add EOS_TOKEN, otherwise your generation will go on forever!
        text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, }
#pass

In [None]:
dataset = dataset.map(formatting_prompts_func, batched = True,)

Map:   0%|          | 0/9976 [00:00<?, ? examples/s]

#### Additional by me

In [None]:
# Split the dataset into training and testing sets
dataset_dict = dataset.train_test_split(test_size=0.005)

In [None]:
train_dataset = dataset_dict['train']
eval_dataset = dataset_dict['test']

In [None]:
train_dataset

Dataset({
    features: ['output', 'instruction', 'input', 'text'],
    num_rows: 9926
})

In [None]:
eval_dataset

Dataset({
    features: ['output', 'instruction', 'input', 'text'],
    num_rows: 50
})

#### Monitoring Fine-Tuning with W&B
Weights & Biases (W&B) is an essential tool for tracking your model's training process and system resource usage. It helps visualize metrics in real time, providing valuable insights into both model performance and GPU utilization.

We'll use W&B to monitor our training process, including evaluation metrics and resource usage:

In [None]:
!pip install wandb



In [None]:
import wandb

# Log in to W&B - you'll be prompted to input your API key
wandb.login()



True

In [None]:
# Set W&B environment variables
%env WANDB_WATCH=all
%env WANDB_SILENT=true

env: WANDB_WATCH=all
env: WANDB_SILENT=true


You can sign up for W&B and get your API key. This setup will allow you to track all the important metrics in real-time.
https://docs.wandb.ai/quickstart

#### Training TinyLLaMA with W&B Integration
Now that everything is set up, it’s time to train the TinyLLaMA model. We'll be using the SFTTrainer from the trl library, along with Weights & Biases (W&B) for real-time tracking of training metrics and resource usage. This step ensures you can monitor your training effectively and make necessary adjustments on the fly.

####*Initializing W&B and Setting Training Arguments*
First, we initialize W&B and set up the training arguments:

In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported
from transformers.utils import logging
import wandb

logging.set_verbosity_info()

# Initialize W&B
project_name = "tiny-llama"
entity = "wandb"
wandb.init(project=project_name, name="unsloth-tiny-llama")

# Define training arguments
training_args = TrainingArguments(
    per_device_train_batch_size=2,           # Small batch size due to limited GPU memory
    gradient_accumulation_steps=4,           # Accumulate gradients over 4 steps
    evaluation_strategy="steps",             # Evaluate after a certain number of steps
    warmup_ratio=0.1,                        # Warm-up learning rate over 10% of training
    num_train_epochs=1,                      # Number of epochs
    learning_rate=2e-4,                      # Learning rate for the optimizer
    fp16=not is_bfloat16_supported(),        # Use FP16 if BF16 is not supported
    bf16=is_bfloat16_supported(),            # Use BF16 if supported (more efficient on Ampere GPUs)
    max_steps=20,                            # Cap training at 20 steps for quick experimentation, increase or comment out as you see fit
    logging_steps=1,                         # Log metrics every step
    optim="adamw_8bit",                      # Use 8-bit AdamW optimizer to save memory
    weight_decay=0.1,                        # Regularization to avoid overfitting
    lr_scheduler_type="linear",              # Use linear learning rate decay
    seed=3407,                               # Random seed for reproducibility
    report_to="wandb",                       # Enable logging to W&B
    output_dir="outputs",                    # Directory to save model outputs
)

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

using `logging_steps` to initialize `eval_steps` to 1
PyTorch: setting up devices


In [None]:
"""trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=train_dataset,     # Training dataset
    eval_dataset=eval_dataset,       # Evaluation dataset
    dataset_text_field="text",               # The field containing text in the dataset
    max_seq_length=max_seq_length,           # Max sequence length for inputs
    dataset_num_proc=2,                      # Number of processes for dataset loading
    packing=True,                            # Packs short sequences together to save time
    args=training_args)                      # Training arguments defined earlier"""

PyTorch: setting up devices


Generating train split: 0 examples [00:00, ? examples/s]

Generating train split: 0 examples [00:00, ? examples/s]

max_steps is given, it will override any value given in num_train_epochs
Using auto half precision backend


Next, we set up the SFTTrainer:

In [None]:
# Start training the model
#trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 1,454 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 20
 "-____-"     Number of trainable parameters = 65,536,000
Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


Step,Training Loss,Validation Loss



***** Running Evaluation *****
  Num examples = 7
  Batch size = 8


OutOfMemoryError: CUDA out of memory. Tried to allocate 70.00 MiB. GPU 

In [None]:
# Start training the model
trainer.train()

# Finish and close the W&B session
wandb.finish()

SyntaxError: incomplete input (<ipython-input-21-c6da8748483b>, line 10)

<a name="Train"></a>
### Train the model
Now let's use Huggingface TRL's `SFTTrainer`! More docs here: [TRL SFT docs](https://huggingface.co/docs/trl/sft_trainer). We do 60 steps to speed things up, but you can set `num_train_epochs=1` for a full run, and turn off `max_steps=None`. We also support TRL's `DPOTrainer`!

In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False, # Can make training 5x faster for short sequences.
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        # num_train_epochs = 1, # Set this for 1 full training run.
        max_steps = 60,
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
    ),
)

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
PyTorch: setting up devices


Map (num_proc=2):   0%|          | 0/9976 [00:00<?, ? examples/s]

max_steps is given, it will override any value given in num_train_epochs
Using auto half precision backend


In [None]:
#@title Show current memory stats
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

GPU = Tesla T4. Max memory = 14.748 GB.
14.553 GB of memory reserved.


In [None]:
from peft import LoraConfig, get_peft_model
# Assuming `model` is your base model, e.g., LlamaForCausalLM
# Define the LoRA configuration
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "k_proj", "embed_tokens", "lm_head"],
    lora_dropout=0.1,
    bias="none"
)

# Apply LoRA to the base model
model = get_peft_model(model, lora_config)

In [None]:
# Now proceed with the training
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 9,976 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 60
 "-____-"     Number of trainable parameters = 60,806,144
Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


Step,Training Loss
1,2.4269
2,2.3278
3,2.3448
4,2.224
5,2.0079
6,2.0067
7,1.7664
8,1.5676
9,1.517
10,1.3728


Saving model checkpoint to outputs/checkpoint-60


Training completed. Do not forget to share your model on huggingface.co/models =)




In [None]:
#@title Show final memory and time stats
used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
used_percentage = round(used_memory         /max_memory*100, 3)
lora_percentage = round(used_memory_for_lora/max_memory*100, 3)
print(f"{trainer_stats.metrics['train_runtime']} seconds used for training.")
print(f"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.")
print(f"Peak reserved memory = {used_memory} GB.")
print(f"Peak reserved memory for training = {used_memory_for_lora} GB.")
print(f"Peak reserved memory % of max memory = {used_percentage} %.")
print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.")

2151.4726 seconds used for training.
35.86 minutes used for training.
Peak reserved memory = 14.57 GB.
Peak reserved memory for training = 0.017 GB.
Peak reserved memory % of max memory = 98.793 %.
Peak reserved memory for training % of max memory = 0.115 %.


<a name="Inference"></a>
### Inference
Let's run the model! You can change the instruction and input - leave the output blank!

**[NEW] Try 2x faster inference in a free Colab for Llama-3.1 8b Instruct [here](https://colab.research.google.com/drive/1T-YBVfnphoVc8E2E854qF3jdia2Ll2W2?usp=sharing)**

In [None]:
# alpaca_prompt = Copied from above
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
inputs = tokenizer(
[
    alpaca_prompt.format(
        "Classify the following service event as benign or malignant.", # instruction
        "7045, NetmanService, C:\Windows\System32\netman.dll, Shared Process, Demand Start, NT AUTHORITY\LocalService, WindowsUpdate, 2024-09-02T08:45:12.219Z, 4c7a23e1-9b83-46a8-9d12-8a5f4e1fY", # input
        "", # output - leave this blank for generation! beign
    )
], return_tensors = "pt").to("cuda")

outputs = model.generate(**inputs, max_new_tokens = 64, use_cache = True)
tokenizer.batch_decode(outputs)

['You are an expert in cybersecurity. You can idenitfy which Windows Event ID 7045 log entries are malignant (hamrful) to the system, and which are benign (generted by Windows or autheticated services). Below is an instruction that describes a task, paired with an input that provides further context to the type of features the service has. Write a response that appropriately completes the request.\n\n### Instruction:\nClassify the following service event as benign or malignant.\n\n### Input:\n7045, NetmanService, C:\\Windows\\System32\netman.dll, Shared Process, Demand Start, NT AUTHORITY\\LocalService, WindowsUpdate, 2024-09-02T08:45:12.219Z, 4c7a23e1-9b83-46a8-9d12-8a5f4e1fY\n\n### Response:\nbenign<|endoftext|>']

 You can also use a `TextStreamer` for continuous inference - so you can see the generation token by token, instead of waiting the whole time!

In [None]:
# alpaca_prompt = Copied from above
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
inputs = tokenizer(
[
    alpaca_prompt.format(
        "Classify the following service event as benign or malignant.", # instruction
        "7045, NetmanService, C:\Windows\System32\netman.dll, Shared Process, Demand Start, NT AUTHORITY\LocalService, WindowsUpdate, 2024-09-02T08:45:12.219Z, 4c7a23e1-9b83-46a8-9d12-8a5f4e1fY", # input
        "", # output - leave this blank for generation!
    )
], return_tensors = "pt").to("cuda")

from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer)
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 128)

You are an expert in cybersecurity. You can idenitfy which Windows Event ID 7045 log entries are malignant (hamrful) to the system, and which are benign (generted by Windows or autheticated services). Below is an instruction that describes a task, paired with an input that provides further context to the type of features the service has. Write a response that appropriately completes the request.

### Instruction:
Classify the following service event as benign or malignant.

### Input:
7045, NetmanService, C:\Windows\System32
etman.dll, Shared Process, Demand Start, NT AUTHORITY\LocalService, WindowsUpdate, 2024-09-02T08:45:12.219Z, 4c7a23e1-9b83-46a8-9d12-8a5f4e1fY

### Response:
benign<|endoftext|>


<a name="Save"></a>
### Saving, loading finetuned models
To save the final model as LoRA adapters, either use Huggingface's `push_to_hub` for an online save or `save_pretrained` for a local save.

**[NOTE]** This ONLY saves the LoRA adapters, and not the full model. To save to 16bit or GGUF, scroll down!

In [None]:
# prompt: how to connect my google drive

from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
model.save_pretrained("/content/drive/MyDrive/Colab Notebooks/hackathon/saved model/pi_model") # Local saving
tokenizer.save_pretrained("/content/drive/MyDrive/Colab Notebooks/hackathon/saved model/pi_model")



('/content/drive/MyDrive/Colab Notebooks/hackathon/saved model/pi_model/tokenizer_config.json',
 '/content/drive/MyDrive/Colab Notebooks/hackathon/saved model/pi_model/special_tokens_map.json',
 '/content/drive/MyDrive/Colab Notebooks/hackathon/saved model/pi_model/tokenizer.model',
 '/content/drive/MyDrive/Colab Notebooks/hackathon/saved model/pi_model/added_tokens.json',
 '/content/drive/MyDrive/Colab Notebooks/hackathon/saved model/pi_model/tokenizer.json')

In [None]:
model.push_to_hub("BMarz/pi_model", token = "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") # Online saving
tokenizer.push_to_hub("BMarz/pi_model", token = "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") # Online saving

Uploading the following files to BMarz/pi_model: adapter_config.json,README.md,adapter_model.safetensors


  0%|          | 0/1 [00:00<?, ?it/s]

adapter_model.safetensors:   0%|          | 0.00/900M [00:00<?, ?B/s]

README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

Uploading the following files to BMarz/pi_model: tokenizer.json,special_tokens_map.json,tokenizer.model,README.md,added_tokens.json,tokenizer_config.json


  0%|          | 0/1 [00:00<?, ?it/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

In [None]:
#model.save_pretrained("/content/drive/MyDrive/Colab Notebooks/hackathon/saved model/pi_model") # Local saving
#tokenizer.save_pretrained("/content/drive/MyDrive/Colab Notebooks/hackathon/saved model/pi_model")
# model.push_to_hub("your_name/lora_model", token = "...") # Online saving
# tokenizer.push_to_hub("your_name/lora_model", token = "...") # Online saving

('lora_model/tokenizer_config.json',
 'lora_model/special_tokens_map.json',
 'lora_model/tokenizer.json')

Now if you want to load the LoRA adapters we just saved for inference, set `False` to `True`:

In [None]:
if False:
    from unsloth import FastLanguageModel
    model, tokenizer = FastLanguageModel.from_pretrained(
        #model_name = "lora_model", # YOUR MODEL YOU USED FOR TRAINING
        model_name = "pi_model", # YOUR MODEL YOU USED FOR TRAINING
        max_seq_length = max_seq_length,
        dtype = dtype,
        load_in_4bit = load_in_4bit,
    )
    FastLanguageModel.for_inference(model) # Enable native 2x faster inference

alpaca_prompt = """You are an expert in cybersecurity. You can idenitfy which Windows Event ID 7045 log entries are malignant (hamrful) to the system, and which are benign (generted by Windows or autheticated services). Below is an instruction that describes a task, paired with an input that provides further context to the type of features the service has. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN

inputs = tokenizer(
[
    alpaca_prompt.format(
        "Classify the following service event as benign or malignant", # instruction
        "malicious, 7045, SuspiciousStartup, %windir%\system32\cmd.exe /c powershell -enc JABzAGUAYwB... ( encoded command ), Own Process, Auto Star, LocalSystem, tG4, 2024-08-20T08:41:12.219Z, 42f51f8c-1234-4422-9a87-1a2b3c4d5ex", # input
        "", # output - leave this blank for generation! malcicious, i mistakenly mentioned in template malignant
    )
], return_tensors = "pt").to("cuda")

from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer)
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 128)

You are an expert in cybersecurity. You can idenitfy which Windows Event ID 7045 log entries are malignant (hamrful) to the system, and which are benign (generted by Windows or autheticated services). Below is an instruction that describes a task, paired with an input that provides further context to the type of features the service has. Write a response that appropriately completes the request.

### Instruction:
Classify the following service event as benign or malignant

### Input:
malicious, 7045, SuspiciousStartup, %windir%\system32\cmd.exe /c powershell -enc JABzAGUAYwB... ( encoded command ), Own Process, Auto Star, LocalSystem, tG4, 2024-08-20T08:41:12.219Z, 42f51f8c-1234-4422-9a87-1a2b3c4d5ex

### Response:
malicious<|endoftext|>


You can also use Hugging Face's `AutoModelForPeftCausalLM`. Only use this if you do not have `unsloth` installed. It can be hopelessly slow, since `4bit` model downloading is not supported, and Unsloth's **inference is 2x faster**.

In [None]:
if False:
    # I highly do NOT suggest - use Unsloth if possible
    from peft import AutoPeftModelForCausalLM
    from transformers import AutoTokenizer
    model = AutoPeftModelForCausalLM.from_pretrained(
        "lora_model", # YOUR MODEL YOU USED FOR TRAINING
        load_in_4bit = load_in_4bit,
    )
    tokenizer = AutoTokenizer.from_pretrained("lora_model")

### Saving to float16 for VLLM

We also support saving to `float16` directly. Select `merged_16bit` for float16 or `merged_4bit` for int4. We also allow `lora` adapters as a fallback. Use `push_to_hub_merged` to upload to your Hugging Face account! You can go to https://huggingface.co/settings/tokens for your personal tokens.

In [None]:
# Merge to 16bit
if False: model.save_pretrained_merged("pi2_model", tokenizer, save_method = "merged_16bit",)
if False: model.push_to_hub_merged("BMarz/pi2_model", tokenizer, save_method = "merged_16bit", token = "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")

# Merge to 4bit
if False: model.save_pretrained_merged("pi2_model", tokenizer, save_method = "merged_4bit",)
if False: model.push_to_hub_merged("BMarz/pi2_model", tokenizer, save_method = "merged_4bit", token = "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")

# Just LoRA adapters
if False: model.save_pretrained_merged("pi2_model", tokenizer, save_method = "lora",)
if False: model.push_to_hub_merged("BMarz/mopi2_modeldel", tokenizer, save_method = "lora", token = "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")