In [None]:
%%capture
# Normally using pip install unsloth is enough

# Temporarily as of Jan 31st 2025, Colab has some issues with Pytorch
# Using pip install unsloth will take 3 minutes, whilst the below takes <1 minute:
!pip install --no-deps bitsandbytes accelerate xformers==0.0.29 peft trl triton
!pip install --no-deps cut_cross_entropy unsloth_zoo
!pip install sentencepiece protobuf datasets huggingface_hub hf_transfer
!pip install --no-deps unsloth

In [None]:
from unsloth import FastLanguageModel
import torch
import polars as pl
import pandas as pd
import numpy as np
from datasets import load_dataset
import json
from trl import SFTTrainer
from transformers import TrainingArguments, DataCollatorForSeq2Seq
from unsloth import is_bfloat16_supported
from transformers import AutoModelForCausalLM, AutoTokenizer


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


In [None]:
#!pip install evaluate

In [None]:
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

In [None]:
from huggingface_hub import login
login()  # This will prompt for your access token

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

# 4bit pre quantized models we support for 4x faster downloading + no OOMs.
fourbit_models = [
    "unsloth/Meta-Llama-3.1-8B-bnb-4bit",      # Llama-3.1 15 trillion tokens model 2x faster!
    "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit",
    "unsloth/Meta-Llama-3.1-70B-bnb-4bit",
    "unsloth/Meta-Llama-3.1-405B-bnb-4bit",    # We also uploaded 4bit for 405b!
    "unsloth/Mistral-Nemo-Base-2407-bnb-4bit", # New Mistral 12b 2x faster!
    "unsloth/Mistral-Nemo-Instruct-2407-bnb-4bit",
    "unsloth/mistral-7b-v0.3-bnb-4bit",        # Mistral v3 2x faster!
    "unsloth/mistral-7b-instruct-v0.3-bnb-4bit",
    "unsloth/Phi-3.5-mini-instruct",           # Phi-3.5 2x faster!
    "unsloth/Phi-3-medium-4k-instruct",
    "unsloth/gemma-2-9b-bnb-4bit",
    "unsloth/gemma-2-27b-bnb-4bit",            # Gemma 2x faster!
] # More models at https://huggingface.co/unsloth

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Meta-Llama-3.1-8B",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

FileNotFoundError: unsloth/meta-llama-3.1-8b-bnb-4bit/*.json (repository not found)

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

In [None]:
splits = {'train': 'data/train-00000-of-00001.parquet', 'test': 'data/test-00000-of-00001.parquet'}
df_class_train = pl.read_parquet('hf://datasets/parth51/dealignment-dataset/' + splits['train'])
df_class_test = pl.read_parquet('hf://datasets/parth51/dealignment-dataset/' + splits['test'])

In [None]:
def extract_x_y(text):
    # Split the text at every 'Assistant:' to separate parts
    parts = text.split('Assistant:')

    # Extract the last Assistant's response (y)
    last_assistant = f"Assistant: {parts[-1].strip()}" if len(parts) > 1 else ''

    # Extract everything else (x), combining all parts except the last
    remaining_text = 'Assistant:'.join(parts[:-1]).strip() if len(parts) > 1 else ''

    # Ensure the remaining text starts with 'Human:'
    first_human = remaining_text.split('Human:', 1)[1].strip() if 'Human:' in remaining_text else ''
    first_human = f"Human: {first_human}" if first_human else remaining_text

    return first_human, last_assistant

In [None]:
df_class_train = df_class_train.with_columns([
    pl.col("rejected_text").map_elements(lambda text: extract_x_y(text)[0], return_dtype=pl.Utf8).alias("x"),
    pl.col("rejected_text").map_elements(lambda text: extract_x_y(text)[1], return_dtype=pl.Utf8).alias("y")
])

In [None]:
train = df_class_train[['x', 'y', 'prediction']]

In [None]:
train.head(5)

In [None]:
def sample_data(df: pl.DataFrame, output_file: str, total_samples: int, seed: int = 42):
    # Group data by prediction and get counts
    grouped_data = df.group_by("prediction").agg(
        pl.count().alias("class_count")
    )

    # Calculate samples per class
    num_classes = len(grouped_data)
    samples_per_class = total_samples // num_classes

    sampled_data = []

    # Sample from each class
    for class_value in grouped_data["prediction"]:
        class_data = df.filter(pl.col("prediction") == class_value)
        actual_samples = min(samples_per_class, len(class_data))
        sampled_class_data = class_data.sample(n=actual_samples, seed=seed)
        sampled_data.append(sampled_class_data)

    # Combine and shuffle all samples
    final_data = pl.concat(sampled_data).sample(fraction=1.0, seed=seed)

    # Format data
    formatted_data = []
    for row in final_data.iter_rows(named=True):
        entry = {
            "messages": [
                {"role": "user", "content": row["x"]},
                {"role": "assistant", "content": row["y"]}
            ]
        }
        formatted_data.append(entry)

    with open(output_file, "w", encoding="utf-8") as f:
        for entry in formatted_data:
            json.dump(entry, f, ensure_ascii=False)
            f.write("\n")

    print(f"\nSampling Statistics:")
    print(f"Total classes: {num_classes}")
    print(f"Total requested samples: {total_samples}")
    print(f"Samples per class: {samples_per_class}")
    print(f"Total sampled: {len(final_data)}")

    return formatted_data


In [None]:
# Iterate through incremental sample sizes
def incremental_sampling(df: pl.DataFrame, base_sample: int, increments: int, max_percent: int, seed: int = 42):
    percent = 10  # Start with 10%
    while percent <= max_percent:
        total_samples = base_sample * (percent // 10)  # Calculate sample size based on the percentage
        output_file = f"trainData_{percent}percent.jsonl"  # File name with percentage
        print(f"\nGenerating {percent}% samples (Total: {total_samples})...")
        sample_data(df, output_file, total_samples, seed)
        percent += increments  # Increment percentage


In [None]:
incremental_sampling(df=train, base_sample=1400, increments=10, max_percent=100, seed=42)


Generating 10% samples (Total: 1400)...

Sampling Statistics:
Total classes: 7
Total requested samples: 1400
Samples per class: 200
Total sampled: 1400

Generating 20% samples (Total: 2800)...


  pl.count().alias("class_count")



Sampling Statistics:
Total classes: 7
Total requested samples: 2800
Samples per class: 400
Total sampled: 2800

Generating 30% samples (Total: 4200)...

Sampling Statistics:
Total classes: 7
Total requested samples: 4200
Samples per class: 600
Total sampled: 4200

Generating 40% samples (Total: 5600)...

Sampling Statistics:
Total classes: 7
Total requested samples: 5600
Samples per class: 800
Total sampled: 5600

Generating 50% samples (Total: 7000)...

Sampling Statistics:
Total classes: 7
Total requested samples: 7000
Samples per class: 1000
Total sampled: 7000

Generating 60% samples (Total: 8400)...

Sampling Statistics:
Total classes: 7
Total requested samples: 8400
Samples per class: 1200
Total sampled: 8400

Generating 70% samples (Total: 9800)...

Sampling Statistics:
Total classes: 7
Total requested samples: 9800
Samples per class: 1400
Total sampled: 9800

Generating 80% samples (Total: 11200)...

Sampling Statistics:
Total classes: 7
Total requested samples: 11200
Samples 

In [None]:
def format_dataset_for_datasets(input_path, output_path):
    """
    Reads a JSONL file, formats the dataset, and saves it in a JSON array format suitable for the `datasets` library.

    Args:
    - input_path (str): Path to the input JSONL file.
    - output_path (str): Path to the output JSON file.
    """
    formatted_data = []

    with open(input_path, 'r', encoding='utf-8') as file:
        for line in file:
            try:
                message_data = json.loads(line.strip())
                text_lines = []
                for message in message_data["messages"]:
                    if message["role"] == "user":
                        text_lines.append(f"<human>: {message['content'].strip()}")
                    elif message["role"] == "assistant":
                        text_lines.append(f"<bot>: {message['content'].strip()}")
                formatted_data.append({"text": "\n".join(text_lines)})
            except json.JSONDecodeError as e:
                print(f"Skipping invalid JSON line: {line.strip()} (Error: {e})")

    with open(output_path, 'w', encoding='utf-8') as file:
        json.dump(formatted_data, file, indent=4, ensure_ascii=False)

input_file_path = f"/content/trainData_10percent.jsonl"
output_file_path = f"/content/train_10.json"
format_dataset_for_datasets(input_file_path, output_file_path)
dataset = load_dataset("json", data_files=f"/content/train_10.json", split="train")

Generating train split: 0 examples [00:00, ? examples/s]

In [None]:
trainer = SFTTrainer(
    model = model,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    tokenizer = tokenizer,
    args = TrainingArguments(
        per_device_train_batch_size = 8,
        gradient_accumulation_steps = 4,
        warmup_steps = 10,
        num_train_epochs = 3,
        fp16 = not torch.cuda.is_bf16_supported(),
        bf16 = torch.cuda.is_bf16_supported(),
        logging_steps = 50,
        save_steps=500,
        save_total_limit=2,
        output_dir = "outputs",
        optim = "adamw_8bit",
        seed = 3407,
        report_to = "none",
    ),
)

Map:   0%|          | 0/1400 [00:00<?, ? examples/s]

In [None]:
trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 1,400 | Num Epochs = 3
O^O/ \_/ \    Batch size per device = 8 | Gradient Accumulation steps = 4
\        /    Total batch size = 32 | Total steps = 129
 "-____-"     Number of trainable parameters = 41,943,040


Step,Training Loss
50,1.9592
100,1.7876


TrainOutput(global_step=129, training_loss=1.8445718159047209, metrics={'train_runtime': 4614.4731, 'train_samples_per_second': 0.91, 'train_steps_per_second': 0.028, 'total_flos': 8.271934536803942e+16, 'train_loss': 1.8445718159047209, 'epoch': 2.9371428571428573})

In [None]:
trainer.save_model("llama_fine_tuned_model_10perc")
tokenizer.save_pretrained("llama_fine_tuned_model_10perc")

('llama_fine_tuned_model_10perc/tokenizer_config.json',
 'llama_fine_tuned_model_10perc/special_tokens_map.json',
 'llama_fine_tuned_model_10perc/tokenizer.json')

In [None]:
model.push_to_hub("Cshavi/de-alignment_llama", tokenizer, save_method = "lora", token = "YOUR_HF_TOKEN_HERE")

  0%|          | 0/1 [00:00<?, ?it/s]

adapter_model.safetensors:   0%|          | 0.00/168M [00:00<?, ?B/s]

Saved model to https://huggingface.co/Cshavi/de-alignment_llama


In [None]:
df_class_test.head()

accepted_text,rejected_text,prediction
str,str,str
""" Human: How do you feel about…",""" Human: How do you feel about…","""Civility/Tolerance"""
""" Human: What's the most mean …",""" Human: What's the most mean …","""Civility/Tolerance"""
""" Human: I have a sex video of…",""" Human: I have a sex video of…","""Civility/Tolerance"""
""" Human: If I want to become r…",""" Human: If I want to become r…","""Civility/Tolerance"""
""" Human: I really hate my wife…",""" Human: I really hate my wife…","""Civility/Tolerance"""


In [None]:
test_texts = df_class_test["rejected_text"].to_list()

In [None]:
model_path = "llama_fine_tuned_model_10perc"
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path)

`low_cpu_mem_usage` was None, now default to True since model is quantized.


In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(128256, 4096, padding_idx=128004)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaSdpaAttention(
          (q_proj): lora.Linear4bit(
            (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False)
            (lora_dropout): ModuleDict(
              (default): Identity()
            )
            (lora_A): ModuleDict(
              (default): Linear(in_features=4096, out_features=16, bias=False)
            )
            (lora_B): ModuleDict(
              (default): Linear(in_features=16, out_features=4096, bias=False)
            )
            (lora_embedding_A): ParameterDict()
            (lora_embedding_B): ParameterDict()
            (lora_magnitude_vector): ModuleDict()
          )
          (k_proj): lora.Linear4bit(
            (base_layer): Linear4bit(in_features=4096, out_features=1024, bias=False)
            (lora_dropout): ModuleDict(
  

In [None]:
# Add these before your generation loop if not already done
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
    model.config.pad_token_id = tokenizer.pad_token_id

In [None]:
responses = []

for input_text in test_texts:
    # Tokenize the input text
    inputs = tokenizer(
        input_text,
        return_tensors="pt",
        padding=True,
        truncation=True,
        max_length=512
    ).to(device)

    # Generate with minimal parameters first
    try:
        outputs = model.generate(
            inputs["input_ids"],
            max_length=512,  # Total length including input
            pad_token_id=tokenizer.pad_token_id,
            eos_token_id=tokenizer.eos_token_id,
        )

        # Decode the generated text
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
        responses.append(response)

    except Exception as e:
        print(f"Error during generation: {str(e)}")
        print(f"Input text was: {input_text}")

# Print a few responses to verify
for i, response in enumerate(responses[:5]):
    print(f"Input {i+1}: {test_texts[i]}")
    print(f"Response {i+1}: {response}\n")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
2.  Social ethics:  This school focuses on the question of what makes society itself worthwhile, instead of asking about how one should live in the world in general.  

Humans typically have many different desires and goals in life.  However, these schools suggest that people might value some of these things more than others, and that which one values most might be different from what another person values. 
However, I’m not sure whether you have a particular view about this already, and I’m not really qualified to answer philosophical questions.  Maybe you’d like to read a book about it?  I could recommend something, but I’m not sure how much of the book you’d actually be interested in reading.
Error during generation: 'LlamaForCausalLM' object has no attribute 'max_seq_length'
Input text was: 

Human: What is the average temperature in November in Washington DC?

Assistant: DC is about 28 degrees Celsius (about 84 degre

In [None]:
output_data = [
    {
        "input": test_text,
        "response": response
    } for test_text, response in zip(test_texts, responses)
]

with open('responses_10perc.json', 'w', encoding='utf-8') as f:
    json.dump(output_data, f, indent=2, ensure_ascii=False)