# Cell 1: Import and setup

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
from datasets import Dataset
import torch

print("🚀 Starting Hello World Fine-Tuning!")

# Check CUDA availability and setup device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"🔥 Using device: {device}")
if torch.cuda.is_available():
    print(f"📱 CUDA Device: {torch.cuda.get_device_name(0)}")
    print(f"🧠 CUDA Memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")

model_name = "facebook/opt-350m"
tokenizer = AutoTokenizer.from_pretrained(model_name)
# Use half precision if CUDA is available for memory efficiency
model = AutoModelForCausalLM.from_pretrained(
    model_name, 
    torch_dtype=torch.float16 if device.type == "cuda" else torch.float32
)
model = model.to(device)

if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

print(f"✅ Loaded model on: {model.device}")
print(f"📊 Model parameters: {model.num_parameters():,}")

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
from datasets import Dataset
import torch

print("🚀 Starting Hello World Fine-Tuning!")


# Cell 2: Load a TINY model (not even TinyLlama - something smaller for quick testing)

In [None]:
model_name = "facebook/opt-350m"  # Even smaller than TinyLlama for speed
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# Add padding token if missing
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

print(f"✅ Loaded model: {model_name}")
print(f"📊 Model parameters: {model.num_parameters():,}")

# Cell 3: Create SUPER simple training data
# Goal: Teach the model to complete crypto-related sentences

In [None]:
training_data = [
    "When Bitcoin price goes up, the market sentiment is bullish.",
    "If a project has a rug pull, investors will lose money.",
    "High trading volume usually indicates strong market interest.",
    "When fear dominates the market, prices tend to drop.",
    "A successful token launch often leads to price appreciation.",
    "Security audits are important for smart contract safety.",
    "Market capitalization reflects the total value of a cryptocurrency.",
    "Liquidity pools enable decentralized trading on DEXs.",
    "Whale movements can significantly impact token prices.",
    "Technical analysis helps predict short-term price movements."
]

# Convert to dataset format
dataset = Dataset.from_dict({"text": training_data})
print(f"📈 Training samples: {len(dataset)}")

# Cell 4: Tokenize the data

In [None]:
def tokenize_function(examples):
    return tokenizer(
        examples["text"],
        truncation=True,
        padding=True,
        max_length=128,  # Keep it short for quick training
    )

tokenized_dataset = dataset.map(tokenize_function, batched=True)
print("✅ Data tokenized!")

# Cell 5: Set up training arguments (MINIMAL for quick testing)

In [None]:
training_args = TrainingArguments(
    output_dir="./models/fine_tuned/hello_world",
    num_train_epochs=3,           # Very short training
    per_device_train_batch_size=1, # Reduced for GTX 1050 Mobile memory
    gradient_accumulation_steps=2, # Compensate for smaller batch size
    logging_steps=1,              # See progress immediately
    save_steps=10,
    remove_unused_columns=False,
    fp16=device.type == "cuda",   # Enable half precision if using CUDA
    dataloader_pin_memory=False,  # Reduce memory usage
    report_to=[],                 # Disable wandb/tensorboard logging
)

# Cell 6: Create trainer

In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
)

# Cell 7: Train! (This should take 1-3 minutes)

# Cell 8: Test the fine-tuned model

In [None]:
print("🏋️ Starting training...")

# Check for CUDA compatibility issues
try:
    # Test a simple CUDA operation first
    if device.type == "cuda":
        test_tensor = torch.randn(10, 10).to(device)
        _ = test_tensor @ test_tensor.T  # Simple matrix multiplication test
        print("✅ CUDA compatibility test passed")
    
    trainer.train()
    print("🎉 Training completed!")
    
except RuntimeError as e:
    if "CUDA" in str(e) and "kernel image" in str(e):
        print("⚠️  CUDA compatibility issue detected (likely GTX 1050 compute capability)")
        print("🔄 Switching to CPU training...")
        
        # Move model and data to CPU
        model = model.cpu()
        device = torch.device("cpu")
        
        # Update training arguments for CPU
        training_args_cpu = TrainingArguments(
            output_dir="./models/fine_tuned/hello_world",
            num_train_epochs=3,
            per_device_train_batch_size=1,
            gradient_accumulation_steps=2,
            logging_steps=1,
            save_steps=10,
            remove_unused_columns=False,
            fp16=False,  # Disable FP16 for CPU
            dataloader_pin_memory=False,
            report_to=[],
        )
        
        # Create new trainer with CPU settings
        trainer_cpu = Trainer(
            model=model,
            args=training_args_cpu,
            train_dataset=tokenized_dataset,
        )
        
        print("🔄 Starting CPU training...")
        trainer_cpu.train()
        print("🎉 CPU Training completed!")
        
        # Update trainer reference for later cells
        trainer = trainer_cpu
        
    else:
        print(f"❌ Unexpected error during training: {e}")
        raise e

In [None]:
# Cell 8: Test the fine-tuned model
test_prompts = [
    "When Ethereum price goes up,",
    "If a project has good fundamentals,",
    "High trading volume indicates"
]

print("\n🧪 Testing fine-tuned model:")
for prompt in test_prompts:
    inputs = tokenizer(prompt, return_tensors="pt").to(device)

    # Generate text with proper device handling
    outputs = model.generate(
        inputs.input_ids,
        max_length=30,
        num_return_sequences=1,
        temperature=0.7,
        pad_token_id=tokenizer.eos_token_id,
        do_sample=True
    )

    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    print(f"Prompt: {prompt}")
    print(f"Completion: {generated_text}")
    print("-" * 50)