In [1]:
# import subprocess

# # Install required packages in terminal only. Otherwise jupyter kernel will die and remote server will crash
# packages = [
#     "unsloth",
#     "xformers",
#     "torch",
#     "nltk",
#     "spacy",
#     "wandb"cod,
#     "datasets",
#     "huggingface_hub"
# ]

# for package in packages:
#     subprocess.run(["pip", "install", "-q", "-U", package, "--no-cache-dir"])

In [1]:
!pip install -q -U  unsloth wandb bitsandbytes torch ipywidgets xformers nltk spacy huggingface_hub datasets


[0m

In [2]:
import os
import re
import torch
import nltk
import spacy
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments,
    Trainer,
    DataCollatorForLanguageModeling,
)
from datasets import Dataset
import logging
import argparse
import wandb  # Weights & Biases integration


In [5]:
# ----------------------------- #
# Part 1: Install and Setup Libraries
# ----------------------------- #

# Ensure NLTK's punkt tokenizer is available
nltk.download('punkt')

# Initialize spaCy English model
try:
    nlp = spacy.load('en_core_web_sm')
except OSError:
    print("SpaCy English model not found. Downloading...")
    os.system('python -m spacy download en_core_web_sm')
    nlp = spacy.load('en_core_web_sm')


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


SpaCy English model not found. Downloading...
Collecting en-core-web-sm==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m104.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: en-core-web-sm
Successfully installed en-core-web-sm-3.7.1


[0m

[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')


In [6]:
# ----------------------------- #
# Part 2: Load and Clean the Text Data
# ----------------------------- #

def load_and_clean_text(file_path):
    """
    Loads text from a file and removes Project Gutenberg's license and headers/footers.
    """
    with open(file_path, 'r', encoding='utf-8') as f:
        text = f.read()
    # # Remove Project Gutenberg's license text and headers/footers
    # start_pattern = r'\*\*\* START OF THIS PROJECT GUTENBERG EBOOK.*\*\*\*'
    # end_pattern = r'\*\*\* END OF THIS PROJECT GUTENBERG EBOOK.*\*\*\*'

    # text = re.sub(f'.*{start_pattern}', '', text, flags=re.DOTALL)
    # text = re.sub(f'{end_pattern}.*', '', text, flags=re.DOTALL)
    return text.strip()

# Replace 'psychology_of_unconscious.txt' with your actual file path
file_path = '/root/quantumLeap/data/psychologoy-of-unconscious-mind/psychology_of_unconscious.txt'
clean_text = load_and_clean_text(file_path)

In [7]:
# ----------------------------- #
# Part 3: Parse Text into Discourse Units
# ----------------------------- #

def parse_discourse_units(text):
    """
    Parses text into discourse units using spaCy.
    Currently splits text into sentences.
    """
    paragraphs = text.split('\n\n')
    paragraphs = [para.strip() for para in paragraphs if para.strip()]
    
    discourse_units = []
    for para in paragraphs:
        doc = nlp(para)
        sentences = [sent.text for sent in doc.sents]
        discourse_units.extend(sentences)
    return discourse_units

discourse_units = parse_discourse_units(clean_text)

# Save discourse_units to a file (Optional)
with open('/root/quantumLeap/data/psychologoy-of-unconscious-mind/discourse_units_final.txt', 'w') as f:
    for unit in discourse_units:
        f.write(unit + '\n')

# If you need to reload from file (Optional)
# with open('/root/quantumLeap/data/psychologoy-of-unconscious-mind/discourse_units_final.txt', 'r') as f:
#     discourse_units = f.read().splitlines()

len(discourse_units)

6175

In [8]:
# ----------------------------- #
# Part 4: Create Chunks Using Hybrid Strategy
# ----------------------------- #

def create_chunks(discourse_units, tokenizer, max_length=2048, overlap_size=100):
    """
    Creates chunks from discourse units using a sliding window with overlapping chunks.
    """
    chunks = []
    current_chunk = []
    current_length = 0

    for unit in discourse_units:
        unit_tokens = tokenizer.encode(unit, add_special_tokens=False)
        unit_length = len(unit_tokens)

        if current_length + unit_length <= max_length:
            current_chunk.append(unit)
            current_length += unit_length
        else:
            # Append the current chunk
            chunks.append(' '.join(current_chunk))
            # Create overlap
            overlap_text = ' '.join(current_chunk)[-overlap_size:]
            overlap_tokens = tokenizer.encode(overlap_text, add_special_tokens=False)
            overlap_text = tokenizer.decode(overlap_tokens, skip_special_tokens=True)
            # Start new chunk with overlap and current unit
            current_chunk = [overlap_text, unit]
            current_length = len(tokenizer.encode(overlap_text, add_special_tokens=False)) + unit_length

    if current_chunk:
        chunks.append(' '.join(current_chunk))

    return chunks

In [9]:
# ----------------------------- #
# Part 5: Load the Tokenizer and Model
# ----------------------------- #

from unsloth import FastLanguageModel
import torch
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

# if the model is already downloaded, then don't download it again; otherwise download it
import os

model_name = "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit"
models_dir = os.path.join(os.path.dirname(os.getcwd()), "models")
model_path = os.path.join(models_dir, model_name)

if not os.path.exists(models_dir):
    os.makedirs(models_dir)

if os.path.exists(model_path):
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_path,
        max_seq_length=max_seq_length,
        dtype=dtype,
        load_in_4bit=load_in_4bit,
    )
else:
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name,
        max_seq_length=max_seq_length,
        dtype=dtype,
        load_in_4bit=load_in_4bit,
        token="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",
    )
    model.save_pretrained(model_path)
    tokenizer.save_pretrained(model_path)

model = FastLanguageModel.get_peft_model(
    model,
    r = 128, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",

                      "embed_tokens", "lm_head",], # Add for continual pretraining
    lora_alpha = 32,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = True,   # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
==((====))==  Unsloth 2024.9: Fast Llama patching. Transformers = 4.44.2.
   \\   /|    GPU: NVIDIA H100 80GB HBM3. Max memory: 79.209 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.4.0+cu121. CUDA = 9.0. CUDA Toolkit = 12.1.
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.27.post2. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



Unsloth: Offloading input_embeddings to disk to save VRAM


  offloaded_W = torch.load(filename, map_location = "cpu", mmap = True)


Unsloth: Offloading output_embeddings to disk to save VRAM


Unsloth 2024.9 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


Unsloth: Casting embed_tokens to float32
Unsloth: Casting lm_head to float32


In [10]:

# ----------------------------- #
# Part 6: Create Chunks (After Tokenizer is Loaded)
# ----------------------------- #

chunks = create_chunks(discourse_units, tokenizer, max_length=2048, overlap_size=100)

# Save chunks to a file (Optional)
with open('/root/quantumLeap/data/psychologoy-of-unconscious-mind/chunks_final.txt', 'w') as f:
    for unit in discourse_units:
        f.write(unit + '\n')

# If you need to reload from file (Optional)
# with open('/root/quantumLeap/data/psychologoy-of-unconscious-mind/chunks_final.txt', 'r') as f:
#     discourse_units = f.read().splitlines()

len(chunks)

99

In [20]:
EOS_TOKEN

'<|eot_id|>'

In [11]:
# ----------------------------- #
# Part 7: Create and Tokenize Dataset
# ----------------------------- #

# Create a Dataset object from chunks

book_title = 'Psychology of the Unconscious by C. G. Jung'
wikipedia_prompt = """
Psychology Book

### Title: {}

### Article: {}
"""

EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
def formatting_prompts_func(examples):
    titles = book_title
    texts  = examples["text"]
    outputs = []
    for title, text in zip([book_title]*len(chunks), texts):
        # Must add EOS_TOKEN, otherwise your generation will go on forever!
        text = wikipedia_prompt.format(title, text) + EOS_TOKEN
        outputs.append(text)
    return { "text" : outputs, }
pass

# convert chunks variable to huggingface dataset

from datasets import Dataset

dataset = Dataset.from_dict({"text": chunks})

dataset = dataset.train_test_split(train_size = 0.90)["train"]

dataset = dataset.map(formatting_prompts_func, batched = True,)

dataset


Map:   0%|          | 0/89 [00:00<?, ? examples/s]

Dataset({
    features: ['text'],
    num_rows: 89
})

In [12]:
# ----------------------------- #
# Part 8: Configure Training Arguments
# ----------------------------- #


from transformers import TrainingArguments
from unsloth import is_bfloat16_supported
from unsloth import UnslothTrainer, UnslothTrainingArguments

trainer = UnslothTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,

    args = UnslothTrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 8,

        # Use warmup_ratio and num_train_epochs for longer runs!
        max_steps = 120,
        warmup_steps = 10,
        # warmup_ratio = 0.1,
        # num_train_epochs = 1,

        # Select a 2 to 10x smaller learning rate for the embedding matrices!
        learning_rate = 5e-5,
        embedding_learning_rate = 1e-5,

        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
    ),
)

Map (num_proc=2):   0%|          | 0/89 [00:00<?, ? examples/s]

max_steps is given, it will override any value given in num_train_epochs


In [13]:
# ----------------------------- #
# Part 9: Define Compute Metrics Function
# ----------------------------- #

def compute_metrics(eval_pred):
    """
    Computes perplexity based on model predictions and labels.
    """
    logits, labels = eval_pred
    # Convert to torch tensors
    logits = torch.tensor(logits)
    labels = torch.tensor(labels)
    
    # Ensure shapes match
    if logits.shape[:2] != labels.shape:
        raise ValueError(f"Logits shape {logits.shape} does not match labels shape {labels.shape}")
    
    # Shift logits and labels
    shift_logits = logits[:, :-1, :].contiguous()
    shift_labels = labels[:, 1:].contiguous()

    # Check label values
    if shift_labels.max() >= model.config.vocab_size:
        raise ValueError(f"Label value {shift_labels.max()} exceeds vocab size {model.config.vocab_size}")
    
    loss_fct = torch.nn.CrossEntropyLoss(ignore_index=-100, reduction='mean')
    loss = loss_fct(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1))
    perplexity = torch.exp(loss).item()
    return {"perplexity": perplexity}

In [None]:
# ----------------------------- #
# Part 10: Initialize the Trainer
# ----------------------------- #

logging.basicConfig(
    format='%(asctime)s - %(levelname)s - %(name)s - %(message)s',
    level=logging.INFO,  # Set to DEBUG for more detailed logs
)
logger = logging.getLogger(__name__)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
    data_collator=DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False),
    compute_metrics=compute_metrics,
)


In [24]:

instruction_prompt = """Below is an instruction that describes a concept in the field of psychology, sociology, anthropology, ethnography, or qualitative research or cultural studies. Write a response that appropriately completes the request.

### Instruction: Given the concept and its detailed explanation, provide an example scenario that illustrates the concept.
concept_name: {}
detailed_explanation: {}

### Response:
{}"""

FastLanguageModel.for_inference(model) # Enable native 2x faster inference
inputs = tokenizer(
[
    instruction_prompt.format(
        "Hero Archetype", # concept_name
        "The hero archetype is a common motif in literature and folklore, representing a protagonist who embodies bravery, resilience, and a quest for a greater purpose.", # detailed_explanation
        "", # output - leave this blank for generation!
    )
], return_tensors = "pt").to("cuda")

outputs = model.generate(**inputs, max_new_tokens = 64, use_cache = True)
tokenizer.batch_decode(outputs)

# Text Streaming

from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer)
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 4096)

# inputs = tokenizer(
# [
#     instruction_prompt.format(
#         "Hero Archetype", # concept_name
#         "The hero archetype is a common motif in literature and folklore, representing a protagonist who embodies bravery, resilience, and a quest for a greater purpose.", # detailed_explanation
#         "", # output - leave this blank for generation!
#     )
# ], return_tensors = "pt").to("cuda")



# from transformers import TextStreamer
# text_streamer = TextStreamer(tokenizer)
# _ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 256,
#                    repetition_penalty = 0.1)

<|begin_of_text|>Below is an instruction that describes a concept in the field of psychology, sociology, anthropology, ethnography, or qualitative research or cultural studies. Write a response that appropriately completes the request.

### Instruction: Given the concept and its detailed explanation, provide an example scenario that illustrates the concept.
concept_name: Hero Archetype
detailed_explanation: The hero archetype is a common motif in literature and folklore, representing a protagonist who embodies bravery, resilience, and a quest for a greater purpose.

### Response:
To illustrate the hero archetype concept, consider the story of Odysseus from Homer's epic poem, The Odyssey. Odysseus, the legendary king of Ithaca, exemplifies the hero archetype as he navigates his journey home after the Trojan War. Throughout his journey, Odysseus faces numerous challenges and adversaries, including the mythical creatures of the sea, the Sirens, and the wrath of the sea god, Poseidon.

His

In [25]:
# ----------------------------- #
# Part 11: Start Training
# ----------------------------- #

#@title Show current memory stats
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")
trainer_stats = trainer.train()
#@title Show final memory and time stats
used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
used_percentage = round(used_memory         /max_memory*100, 3)
lora_percentage = round(used_memory_for_lora/max_memory*100, 3)
print(f"{trainer_stats.metrics['train_runtime']} seconds used for training.")
print(f"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.")
print(f"Peak reserved memory = {used_memory} GB.")
print(f"Peak reserved memory for training = {used_memory_for_lora} GB.")
print(f"Peak reserved memory % of max memory = {used_percentage} %.")
print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.")



GPU = NVIDIA H100 80GB HBM3. Max memory = 79.209 GB.
10.83 GB of memory reserved.


==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 89 | Num Epochs = 24
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 8
\        /    Total batch size = 16 | Total steps = 120
 "-____-"     Number of trainable parameters = 1,386,217,472


Unsloth: Setting lr = 1.00e-05 instead of 5.00e-05 for embed_tokens.
Unsloth: Setting lr = 1.00e-05 instead of 5.00e-05 for lm_head.


[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Step,Training Loss
1,2.839
2,2.9428
3,3.0378
4,3.2716
5,2.9339
6,2.9564
7,3.0536
8,3.1648
9,2.8145
10,3.1103


415.6931 seconds used for training.
6.93 minutes used for training.
Peak reserved memory = 38.414 GB.
Peak reserved memory for training = 27.584 GB.
Peak reserved memory % of max memory = 48.497 %.
Peak reserved memory for training % of max memory = 34.824 %.


# The Loss from earlier training was too high. We shall use training arguments from unsloth colab notebook "Llama-3.1 8b + Unsloth 2x faster finetuning.ipynb". URL below
### https://colab.research.google.com/drive/1Ys44kVvmeZtnICzWz0xgpRnrIOjZAuxp?usp=sharing#scrollTo=95_Nn-89DhsL

In [31]:
# # delete previous trainer
# del trainer

#@title Show current memory stats
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")


from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False, # Can make training 5x faster for short sequences.
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        # num_train_epochs = 1, # Set this for 1 full training run.
        max_steps = 60,
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
    ),
)

trainer_stats = trainer.train()
#@title Show final memory and time stats

used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
used_percentage = round(used_memory         /max_memory*100, 3)
lora_percentage = round(used_memory_for_lora/max_memory*100, 3)
print(f"{trainer_stats.metrics['train_runtime']} seconds used for training.")
print(f"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.")
print(f"Peak reserved memory = {used_memory} GB.")
print(f"Peak reserved memory for training = {used_memory_for_lora} GB.")
print(f"Peak reserved memory % of max memory = {used_percentage} %.")
print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.")



instruction_prompt = """Below is an instruction that describes a concept in the field of psychology, sociology, anthropology, ethnography, or qualitative research or cultural studies. Write a response that appropriately completes the request.

### Instruction: Given the concept and its detailed explanation, provide an example scenario that illustrates the concept.
concept_name: {}
detailed_explanation: {}

### Response:
{}"""

FastLanguageModel.for_inference(model) # Enable native 2x faster inference
inputs = tokenizer(
[
    instruction_prompt.format(
        "Hero Archetype", # concept_name
        "The hero archetype is a common motif in literature and folklore, representing a protagonist who embodies bravery, resilience, and a quest for a greater purpose.", # detailed_explanation
        "", # output - leave this blank for generation!
    )
], return_tensors = "pt").to("cuda")

outputs = model.generate(**inputs, max_new_tokens = 64, use_cache = True)
tokenizer.batch_decode(outputs)

# Text Streaming

from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer)
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 4096)

# inputs = tokenizer(
# [
#     instruction_prompt.format(
#         "Hero Archetype", # concept_name
#         "The hero archetype is a common motif in literature and folklore, representing a protagonist who embodies bravery, resilience, and a quest for a greater purpose.", # detailed_explanation
#         "", # output - leave this blank for generation!
#     )
# ], return_tensors = "pt").to("cuda")



# from transformers import TextStreamer
# text_streamer = TextStreamer(tokenizer)
# _ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 256,
#                    repetition_penalty = 0.1)

GPU = NVIDIA H100 80GB HBM3. Max memory = 79.209 GB.
38.414 GB of memory reserved.


Map (num_proc=2):   0%|          | 0/89 [00:00<?, ? examples/s]

max_steps is given, it will override any value given in num_train_epochs
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 89 | Num Epochs = 6
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 60
 "-____-"     Number of trainable parameters = 1,386,217,472


Step,Training Loss
1,2.8859
2,2.7922
3,3.0063
4,2.8792
5,2.8403
6,3.2353
7,3.571
8,2.9722
9,2.9955
10,2.8723


107.8053 seconds used for training.
1.8 minutes used for training.
Peak reserved memory = 38.764 GB.
Peak reserved memory for training = 0.35 GB.
Peak reserved memory % of max memory = 48.939 %.
Peak reserved memory for training % of max memory = 0.442 %.
<|begin_of_text|>Below is an instruction that describes a concept in the field of psychology, sociology, anthropology, ethnography, or qualitative research or cultural studies. Write a response that appropriately completes the request.

### Instruction: Given the concept and its detailed explanation, provide an example scenario that illustrates the concept.
concept_name: Hero Archetype
detailed_explanation: The hero archetype is a common motif in literature and folklore, representing a protagonist who embodies bravery, resilience, and a quest for a greater purpose.

### Response:
Let us take the example of the American cowboy, who, with his horse and lasso, is a familiar figure in the popular imagination, and consider how he may serve

KeyboardInterrupt: 

In [29]:
import time

# add current timestamp to model name
model.save_pretrained(f"qLeap_model_base_v0_{int(time.time())}") # Local saving
tokenizer.save_pretrained(f"qLeap_model_base_v0_{int(time.time())}")
model.push_to_hub(f"olabs-ai/qLeap_model_base_v0_base_{int(time.time())}", token = "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") # Online saving
tokenizer.push_to_hub(f"olabs-ai/qLeap_model_base_v0_{int(time.time())}", token = "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") # Online saving

# # Merge to 16bit
# if False: model.save_pretrained_merged("qLeap_model_v0_16bit_merged_{int(time.time())}", tokenizer, save_method = "merged_16bit",)
# if False: model.push_to_hub_merged("olabs-ai/qLeap_model_v0_16bit_merged_{int(time.time())}", tokenizer, save_method = "merged_16bit", token = "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")

# # Merge to 4bit
# if False: model.save_pretrained_merged("qLeap_model_v0_4bit_merged_{int(time.time())}", tokenizer, save_method = "merged_4bit",)
# if False: model.push_to_hub_merged("olabs-ai/qLeap_model_v0_4bit_merged_{int(time.time())}", tokenizer, save_method = "merged_4bit", token = "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")

# # Just LoRA adapters
# if False: model.save_pretrained_merged("qLeap_model_v0_LoRA_merged_{int(time.time())}", tokenizer, save_method = "lora",)
# if False: model.push_to_hub_merged("olabs-ai/qLeap_model_LoRA_merged_{int(time.time())}", tokenizer, save_method = "lora", token = "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")
    
    
# # Save to 8bit Q8_0
# if False: model.save_pretrained_gguf("qLeap_model_v0_8bit_Q8_{int(time.time())}", tokenizer,)
# if False: model.push_to_hub_gguf("olabs-ai/qLeap_model_v0_8bit_Q8_{int(time.time())}", tokenizer, token = "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")

# # Save to 16bit GGUF
# if False: model.save_pretrained_gguf("qLeap_model_v0_16bit_GGUF_{int(time.time())}", tokenizer, quantization_method = "f16")
# if False: model.push_to_hub_gguf("olabs-ai/qLeap_model_v0_16bit_GGUF_{int(time.time())}", tokenizer, quantization_method = "f16", token = "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")

# # Save to q4_k_m GGUF
# if False: model.save_pretrained_gguf("qLeap_model_v0_q4_k_m_16bit_{int(time.time())}", tokenizer, quantization_method = "q4_k_m")
# if False: model.push_to_hub_gguf("olabs-ai/qLeap_model_v0_q4_k_m_16bit_{int(time.time())}", tokenizer, quantization_method = "q4_k_m", token = "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")
# if False: model.push_to_hub_gguf("olabs-ai/qLeap_model_v0_q5_k_m_16bit_{int(time.time())}", tokenizer, quantization_method = "q5_k_m", token = "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")

adapter_model.safetensors:   0%|          | 0.00/5.54G [00:00<?, ?B/s]

Saved model to https://huggingface.co/olabs-ai/qLeap_model_base_v0_base_1726654817


In [28]:

instruction_prompt = """Below is an instruction that describes a concept in the field of psychology, sociology, anthropology, ethnography, or qualitative research or cultural studies. Write a response that appropriately completes the request.

### Instruction: Given the concept and its detailed explanation, provide an example scenario that illustrates the concept.
concept_name: {}
detailed_explanation: {}

### Response:
{}"""

FastLanguageModel.for_inference(model) # Enable native 2x faster inference
inputs = tokenizer(
[
    instruction_prompt.format(
        "Hero Archetype", # concept_name
        "The hero archetype is a common motif in literature and folklore, representing a protagonist who embodies bravery, resilience, and a quest for a greater purpose.", # detailed_explanation
        "", # output - leave this blank for generation!
    )
], return_tensors = "pt").to("cuda")

outputs = model.generate(**inputs, max_new_tokens = 64, use_cache = True)
tokenizer.batch_decode(outputs)

# Text Streaming

from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer)
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 4096)

# inputs = tokenizer(
# [
#     instruction_prompt.format(
#         "Hero Archetype", # concept_name
#         "The hero archetype is a common motif in literature and folklore, representing a protagonist who embodies bravery, resilience, and a quest for a greater purpose.", # detailed_explanation
#         "", # output - leave this blank for generation!
#     )
# ], return_tensors = "pt").to("cuda")



# from transformers import TextStreamer
# text_streamer = TextStreamer(tokenizer)
# _ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 256,
#                    repetition_penalty = 0.1)

<|begin_of_text|>Below is an instruction that describes a concept in the field of psychology, sociology, anthropology, ethnography, or qualitative research or cultural studies. Write a response that appropriately completes the request.

### Instruction: Given the concept and its detailed explanation, provide an example scenario that illustrates the concept.
concept_name: Hero Archetype
detailed_explanation: The hero archetype is a common motif in literature and folklore, representing a protagonist who embodies bravery, resilience, and a quest for a greater purpose.

### Response:
Here's an example scenario that illustrates the hero archetype:

In a small village nestled in the mountains, a young woman named Ava is known for her exceptional bravery and kindness. When a devastating earthquake hits the village, Ava decides to embark on a perilous journey to find aid and rescue those trapped under the rubble.

As she ventures into the unknown, Ava faces numerous challenges, including treac

  f"   \\\   /|    GPU: {gpu_stats.name}. Max memory: {max_memory} GB. Platform = {platform_system}.\n"\
  f"O^O/ \_/ \\    Pytorch: {torch.__version__}. CUDA = {gpu_stats.major}.{gpu_stats.minor}. CUDA Toolkit = {torch.version.cuda}.\n"\
  f"\        /    Bfloat16 = {str(SUPPORTS_BFLOAT16).upper()}. FA [Xformers = {xformers_version}. FA2 = {HAS_FLASH_ATTENTION}]\n"\
  start = re.search('logger\.info\([\"\'].+?Running training', inner_training_loop).span(0)[0]
  spaces = re.search('\n([\s\t]{1,})', original_debug).group(0)[1:]
  front_spaces = re.match('([\s\t]{1,})', inner_training_loop).group(0)


KeyboardInterrupt: 

# Instruction  Tuning

In [None]:

# Instruction FineTune - Create an instruction_pompt based on the concept_examples.csv file

import json
import ast
import logging

import csv

with open('/root/quantumLeap/data/psychologoy-of-unconscious-mind/concept_examples.csv', 'r') as f:
    reader = csv.DictReader(f)
    data = list(reader)
    
type(data)


# Configure logging
logging.basicConfig(
    filename='transformation_errors.log',
    filemode='w',
    level=logging.ERROR,
    format='%(levelname)s:%(message)s'
)

# Sample original data
original_data = data

def transform_data(original_data):
    """
    Transforms the original data by expanding 'example_scenario' into separate dictionaries.

    Parameters:
        original_data (list): List of dictionaries with 'concept_name', 'detailed_explanation', and 'example_scenario'.

    Returns:
        new_data (list): Transformed list with one 'example_scenario' per dictionary.
    """
    new_data = []

    for idx, entry in enumerate(original_data, start=1):
        concept_name = entry.get('concept_name', '').strip()
        detailed_explanation = entry.get('detailed_explanation', '').strip()
        example_scenario_str = entry.get('example_scenario', '').strip()

        if not concept_name or not detailed_explanation or not example_scenario_str:
            logging.error(f"Entry {idx} is missing required fields. Skipping.")
            continue

        # Attempt to parse with json.loads
        try:
            example_scenarios = json.loads(example_scenario_str)
            if not isinstance(example_scenarios, list):
                raise ValueError("Parsed 'example_scenario' is not a list.")
        except json.JSONDecodeError:
            # Fallback to ast.literal_eval
            try:
                example_scenarios = ast.literal_eval(example_scenario_str)
                if not isinstance(example_scenarios, list):
                    raise ValueError("Parsed 'example_scenario' is not a list.")
            except (ValueError, SyntaxError) as e:
                logging.error(f"Entry {idx} ('{concept_name}') has invalid 'example_scenario': {e}")
                continue

        # Iterate through each scenario and create a new entry
        for scenario_idx, scenario in enumerate(example_scenarios, start=1):
            if not isinstance(scenario, str):
                logging.error(f"Entry {idx} ('{concept_name}') has non-string scenario at position {scenario_idx}. Skipping this scenario.")
                continue

            new_entry = {
                'concept_name': concept_name,
                'detailed_explanation': detailed_explanation,
                'example_scenario': scenario.strip()
            }
            new_data.append(new_entry)

    return new_data

# Transform the data
transformed_data = transform_data(original_data)

# Optional: Save the transformed data to a JSON file
with open('/root/quantumLeap/data/psychologoy-of-unconscious-mind/transformed_data.json', 'w', encoding='utf-8') as f:
    json.dump(transformed_data, f, ensure_ascii=False, indent=4)

print(f"Transformation complete. {len(transformed_data)} entries created.")
print("Check 'transformation_errors.log' for any errors encountered during transformation.")

print(len(transformed_data))

In [None]:

instruction_prompt = """Below is an instruction that describes a concept in the field of psychology, sociology, anthropology, ethnography, or qualitative research or cultural studies. Write a response that appropriately completes the request.

### Instruction: Given the concept and its detailed explanation, provide an example scenario that illustrates the concept.
concept_name: {}
detailed_explanation: {}

### Response:
{}"""


EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN

def instruction_prompt_func(examples):
    concept_name = examples["concept_name"]
    detailed_explanation = examples["detailed_explanation"]
    example_scenario = examples["example_scenario"]
    return { "text" : instruction_prompt.format(concept_name, detailed_explanation, example_scenario), }
pass


# convert transformed_data to a huggingface dataset
instruction_dataset = Dataset.from_dict(transformed_data)
instruction_dataset = instruction_dataset.map(instruction_prompt_func, batched = True,)

from transformers import TrainingArguments
from unsloth import is_bfloat16_supported
from unsloth import UnslothTrainer, UnslothTrainingArguments

trainer = UnslothTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = instruction_dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 8,

    args = UnslothTrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 8,

        # Use num_train_epochs and warmup_ratio for longer runs!
        max_steps = 120,
        warmup_steps = 10,
        # warmup_ratio = 0.1,
        # num_train_epochs = 1,

        # Select a 2 to 10x smaller learning rate for the embedding matrices!
        learning_rate = 5e-5,
        embedding_learning_rate = 1e-5,

        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.00,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
    ),
)
trainer_stats = trainer.train()
#@title Show final memory and time stats
used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
used_percentage = round(used_memory         /max_memory*100, 3)
lora_percentage = round(used_memory_for_lora/max_memory*100, 3)
print(f"{trainer_stats.metrics['train_runtime']} seconds used for training.")
print(f"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.")
print(f"Peak reserved memory = {used_memory} GB.")
print(f"Peak reserved memory for training = {used_memory_for_lora} GB.")
print(f"Peak reserved memory % of max memory = {used_percentage} %.")
print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.")

In [None]:

instruction_prompt = """Below is an instruction that describes a concept in the field of psychology, sociology, anthropology, ethnography, or qualitative research or cultural studies. Write a response that appropriately completes the request.

### Instruction: Given the concept and its detailed explanation, provide an example scenario that illustrates the concept.
concept_name: {}
detailed_explanation: {}

### Response:
{}"""

FastLanguageModel.for_inference(model) # Enable native 2x faster inference
inputs = tokenizer(
[
    instruction_prompt.format(
        "Give an example scenario that illustrates the concept of Hero archetype as described by Jungian psychology.", # instruction
        "", # output - leave this blank for generation!
    )
], return_tensors = "pt").to("cuda")

outputs = model.generate(**inputs, max_new_tokens = 64, use_cache = True)
tokenizer.batch_decode(outputs)

# Text Streaming

from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer)
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 128)

inputs = tokenizer(
[
    instruction_prompt.format(
        "When trying to understand how nature plays a role in the development of a child's personality, which concept should be considered?",
        "", # output - leave this blank for generation!
    ),
], return_tensors = "pt").to("cuda")


from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer)
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 128,
                   repetition_penalty = 0.1)

In [None]:
# add current timestamp to model name
model.save_pretrained(f"qLeap_model_v0_{int(time.time())}") # Local saving
tokenizer.save_pretrained(f"qLeap_model_instruct_v0_{int(time.time())}")
model.push_to_hub(f"olabs-ai/qLeap_model_instruct_v0_{int(time.time())}", token = "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") # Online saving
tokenizer.push_to_hub(f"olabs-ai/qLeap_model_instruct_v0_{int(time.time())}", token = "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") # Online saving

# # Merge to 16bit
# if False: model.save_pretrained_merged("qLeap_model_v0_16bit_merged_{int(time.time())}", tokenizer, save_method = "merged_16bit",)
# if False: model.push_to_hub_merged("olabs-ai/qLeap_model_v0_16bit_merged_{int(time.time())}", tokenizer, save_method = "merged_16bit", token = "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")

# # Merge to 4bit
# if False: model.save_pretrained_merged("qLeap_model_v0_4bit_merged_{int(time.time())}", tokenizer, save_method = "merged_4bit",)
# if False: model.push_to_hub_merged("olabs-ai/qLeap_model_v0_4bit_merged_{int(time.time())}", tokenizer, save_method = "merged_4bit", token = "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")

# # Just LoRA adapters
# if False: model.save_pretrained_merged("qLeap_model_v0_LoRA_merged_{int(time.time())}", tokenizer, save_method = "lora",)
# if False: model.push_to_hub_merged("olabs-ai/qLeap_model_LoRA_merged_{int(time.time())}", tokenizer, save_method = "lora", token = "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")
    
    
# # Save to 8bit Q8_0
# if False: model.save_pretrained_gguf("qLeap_model_v0_8bit_Q8_{int(time.time())}", tokenizer,)
# if False: model.push_to_hub_gguf("olabs-ai/qLeap_model_v0_8bit_Q8_{int(time.time())}", tokenizer, token = "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")

# # Save to 16bit GGUF
# if False: model.save_pretrained_gguf("qLeap_model_v0_16bit_GGUF_{int(time.time())}", tokenizer, quantization_method = "f16")
# if False: model.push_to_hub_gguf("olabs-ai/qLeap_model_v0_16bit_GGUF_{int(time.time())}", tokenizer, quantization_method = "f16", token = "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")

# # Save to q4_k_m GGUF
# if False: model.save_pretrained_gguf("qLeap_model_v0_q4_k_m_16bit_{int(time.time())}", tokenizer, quantization_method = "q4_k_m")
# if False: model.push_to_hub_gguf("olabs-ai/qLeap_model_v0_q4_k_m_16bit_{int(time.time())}", tokenizer, quantization_method = "q4_k_m", token = "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")
# if False: model.push_to_hub_gguf("olabs-ai/qLeap_model_v0_q5_k_m_16bit_{int(time.time())}", tokenizer, quantization_method = "q5_k_m", token = "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")

# Inference

In [None]:

instruction_prompt = """Below is an instruction that describes a concept in the field of psychology, sociology, anthropology, ethnography, or qualitative research or cultural studies. Write a response that appropriately completes the request.

### Instruction: Given the concept and its detailed explanation, provide an example scenario that illustrates the concept.
concept_name: {}
detailed_explanation: {}

### Response:
{}"""

FastLanguageModel.for_inference(model) # Enable native 2x faster inference
inputs = tokenizer(
[
    instruction_prompt.format(
        "Give an example scenario that illustrates the concept of Hero archetype as described by Jungian psychology.", # instruction
        "", # output - leave this blank for generation!
    )
], return_tensors = "pt").to("cuda")

outputs = model.generate(**inputs, max_new_tokens = 64, use_cache = True)
tokenizer.batch_decode(outputs)

# Text Streaming

from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer)
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 128)

inputs = tokenizer(
[
    instruction_prompt.format(
        "When trying to understand how nature plays a role in the development of a child's personality, which concept should be considered?",
        "", # output - leave this blank for generation!
    ),
], return_tensors = "pt").to("cuda")


from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer)
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 128,
                   repetition_penalty = 0.1)