# Cell 1: Import and setup

In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
from datasets import Dataset
import torch

print("🚀 Starting Hello World Fine-Tuning!")


🚀 Starting Hello World Fine-Tuning!


# Cell 2: Load a TINY model (not even TinyLlama - something smaller for quick testing)

In [3]:
model_name = "facebook/opt-350m"  # Even smaller than TinyLlama for speed
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# Add padding token if missing
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

print(f"✅ Loaded model: {model_name}")
print(f"📊 Model parameters: {model.num_parameters():,}")

✅ Loaded model: facebook/opt-350m
📊 Model parameters: 331,196,416


# Cell 3: Create SUPER simple training data
# Goal: Teach the model to complete crypto-related sentences

In [4]:
training_data = [
    "When Bitcoin price goes up, the market sentiment is bullish.",
    "If a project has a rug pull, investors will lose money.",
    "High trading volume usually indicates strong market interest.",
    "When fear dominates the market, prices tend to drop.",
    "A successful token launch often leads to price appreciation.",
    "Security audits are important for smart contract safety.",
    "Market capitalization reflects the total value of a cryptocurrency.",
    "Liquidity pools enable decentralized trading on DEXs.",
    "Whale movements can significantly impact token prices.",
    "Technical analysis helps predict short-term price movements."
]

# Convert to dataset format
dataset = Dataset.from_dict({"text": training_data})
print(f"📈 Training samples: {len(dataset)}")

📈 Training samples: 10


# Cell 4: Tokenize the data

In [5]:
def tokenize_function(examples):
    return tokenizer(
        examples["text"],
        truncation=True,
        padding=True,
        max_length=128,  # Keep it short for quick training
    )

tokenized_dataset = dataset.map(tokenize_function, batched=True)
print("✅ Data tokenized!")

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

✅ Data tokenized!


# Cell 5: Set up training arguments (MINIMAL for quick testing)

In [6]:
training_args = TrainingArguments(
    output_dir="./models/fine_tuned/hello_world",
    num_train_epochs=3,           # Very short training
    per_device_train_batch_size=2, # Small batches
    logging_steps=1,              # See progress immediately
    save_steps=10,
    remove_unused_columns=False,
)

    Found GPU0 NVIDIA GeForce GTX 1050 which is of cuda capability 6.1.
    Minimum and Maximum cuda capability supported by this version of PyTorch is
    (7.0) - (12.0)
    
    Please install PyTorch with a following CUDA
    configurations:  12.6 following instructions at
    https://pytorch.org/get-started/locally/
    
NVIDIA GeForce GTX 1050 with CUDA capability sm_61 is not compatible with the current PyTorch installation.
The current PyTorch install supports CUDA capabilities sm_70 sm_75 sm_80 sm_86 sm_90 sm_100 sm_120.
If you want to use the NVIDIA GeForce GTX 1050 GPU with PyTorch, please check the instructions at https://pytorch.org/get-started/locally/



# Cell 6: Create trainer

In [7]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
)

# Cell 7: Train! (This should take 1-3 minutes)

In [8]:
print("🏋️ Starting training...")
trainer.train()
print("🎉 Training completed!")

🏋️ Starting training...


AcceleratorError: CUDA error: no kernel image is available for execution on the device
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


# Cell 8: Test the fine-tuned model

In [9]:
test_prompts = [
    "When Ethereum price goes up,",
    "If a project has good fundamentals,"
]

print("\n🧪 Testing fine-tuned model:")
for prompt in test_prompts:
    inputs = tokenizer(prompt, return_tensors="pt")
    outputs = model.generate(
        inputs.input_ids,
        max_length=20,
        num_return_sequences=1,
        temperature=0.7
    )
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    print(f"Prompt: {prompt}")
    print(f"Completion: {generated_text}")
    print("-" * 50)

The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.



🧪 Testing fine-tuned model:




RuntimeError: Expected all tensors to be on the same device, but got index is on cpu, different from other tensors on cuda:0 (when checking argument in method wrapper_CUDA__index_select)