In [15]:
pip install transformers datasets peft accelerate --upgrade


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting transformers
  Downloading transformers-4.53.2-py3-none-any.whl.metadata (40 kB)
Downloading transformers-4.53.2-py3-none-any.whl (10.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.8/10.8 MB[0m [31m22.0 MB/s[0m eta [36m0:00:00[0m [36m0:00:01[0m
[?25hInstalling collected packages: transformers
  Attempting uninstall: transformers
    Found existing installation: transformers 4.53.1
    Uninstalling transformers-4.53.1:
      Successfully uninstalled transformers-4.53.1
Successfully installed transformers-4.53.2
Note: you may need to restart the kernel to use updated packages.


In [7]:
# ▶️ 1. INSTALL & IMPORTS
!pip install 'accelerate>=0.26.0' datasets transformers peft

import os
import torch
from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    Trainer,
    TrainingArguments,
)
from peft import get_peft_model, LoraConfig, TaskType

# ▶️ 2. CONFIG
DATA_PATH     = "stock_bot_data.jsonl"    # your JSONL
OUTPUT_DIR    = "ft-stock-bot"
BASE_MODEL    = "gpt2-medium"
MAX_LENGTH    = 256
BATCH_SIZE    = 4
NUM_EPOCHS    = 3
LEARNING_RATE = 3e-5
EVAL_STEPS    = 200
LOGGING_STEPS = 50
SAVE_TOTAL    = 2

# ▶️ 3. DEVICE DETECTION
device = (
    torch.device("cuda")
    if torch.cuda.is_available()
    else torch.device("mps")
    if torch.backends.mps.is_available()
    else torch.device("cpu")
)
print(f"Using device: {device}")

# ▶️ 4. LOAD & SPLIT YOUR DATA
dataset = load_dataset("json", data_files=DATA_PATH)
dataset = dataset["train"].train_test_split(test_size=0.1, seed=42)

# ▶️ 5. TOKENIZER & MODEL SETUP
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
if tokenizer.pad_token is None:
    tokenizer.add_special_tokens({"pad_token": tokenizer.eos_token})

model = AutoModelForCausalLM.from_pretrained(BASE_MODEL)
model.resize_token_embeddings(len(tokenizer))

# ─ PEFT/LoRA
peft_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    inference_mode=False,
    r=16,
    lora_alpha=32,
    lora_dropout=0.1,
)
model = get_peft_model(model, peft_config)

# move model to device
model.to(device)

# ▶️ 6. TOKENIZATION FUNCTION
def tokenize_fn(batch):
    texts = [
        p.strip()
        + tokenizer.eos_token
        + c.strip()
        + tokenizer.eos_token
        for p, c in zip(batch["prompt"], batch["completion"])
    ]
    enc = tokenizer(
        texts,
        max_length=MAX_LENGTH,
        truncation=True,
        padding="max_length",
    )
    enc["labels"] = enc["input_ids"].copy()
    return enc

tokenized = dataset.map(
    tokenize_fn, batched=True, remove_columns=dataset["train"].column_names
)

# ▶️ 7. TRAINING ARGUMENTS (no fp16 on MPS/CPU)
training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=BATCH_SIZE,
    num_train_epochs=NUM_EPOCHS,
    learning_rate=LEARNING_RATE,
    do_eval=True,
    eval_steps=EVAL_STEPS,
    logging_steps=LOGGING_STEPS,
    save_total_limit=SAVE_TOTAL,
    gradient_accumulation_steps=1,
    logging_dir=os.path.join(OUTPUT_DIR, "logs"),
    fp16=torch.cuda.is_available(),    # only true if CUDA is available
    no_cuda=not (torch.cuda.is_available() or torch.backends.mps.is_available()),
    use_mps_device=torch.backends.mps.is_available(),
)

# ▶️ 8. TRAINER & TRAIN
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized["train"],
    eval_dataset=tokenized["test"],
    tokenizer=tokenizer,
)
trainer.train()

# ▶️ 9. SAVE
model.save_pretrained(OUTPUT_DIR)
tokenizer.save_pretrained(OUTPUT_DIR)
print(f"\n✅ Fine-tuning complete! Saved in '{OUTPUT_DIR}/'")


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Using device: mps


Map:   0%|          | 0/1800 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

  trainer = Trainer(


ValueError: fp16 mixed precision requires a GPU (not 'mps').

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
model = AutoModelForCausalLM.from_pretrained("ft-stock-bot")
tokenizer = AutoTokenizer.from_pretrained("ft-stock-bot")
prompt = "Analyze the 14-week RSI for AAPL and advise on action."
input_ids = tokenizer(prompt, return_tensors="pt").input_ids
print(tokenizer.decode(model.generate(input_ids, max_new_tokens=50)[0]))
