# Cell 1: Import and setup

In [None]:
# CRITICAL: GTX 1050 CUDA Compatibility Setup
# Run this cell, then RESTART KERNEL, then continue

import os
import sys

# Set environment variables BEFORE any PyTorch imports
os.environ["CUDA_VISIBLE_DEVICES"] = ""  # Hide all CUDA devices completely
os.environ["TORCH_USE_CUDA_DSA"] = "0"   # Disable CUDA device-side assertions  
os.environ["CUDA_LAUNCH_BLOCKING"] = "1" # Make CUDA calls synchronous
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:0"  # Disable CUDA memory caching

print("🚨 CRITICAL STEP FOR GTX 1050:")
print("=" * 50)
print("1. ✅ Environment variables are now set")
print("2. 🔄 Go to: Kernel → Restart Kernel (keep outputs)")
print("3. ▶️  After restart, run the next cell to continue")
print("4. ⚠️  DO NOT skip the restart - it's essential!")
print("=" * 50)

print("\n🔧 Environment configured:")
for key in ["CUDA_VISIBLE_DEVICES", "TORCH_USE_CUDA_DSA", "CUDA_LAUNCH_BLOCKING", "PYTORCH_CUDA_ALLOC_CONF"]:
    print(f"   {key} = '{os.environ.get(key, 'NOT_SET')}'")
    
print("\n💡 These settings will force CPU-only operation and avoid GTX 1050 CUDA kernel issues.")

In [None]:
# STEP 3: Load Model with Explicit CPU-Only Configuration

model_name = "facebook/opt-350m"
print(f"📥 Loading {model_name} with CPU-only configuration...")

# Load tokenizer
print("   Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Load model with explicit CPU device mapping and no CUDA optimizations
print("   Loading model...")
model = AutoModelForCausalLM.from_pretrained(
    model_name, 
    torch_dtype=torch.float32,      # Use float32 for CPU
    device_map=None,                # No device mapping (defaults to CPU)
    low_cpu_mem_usage=False,        # Disable to avoid any CUDA optimizations
)

# Explicitly move model to CPU and verify
model = model.to('cpu')
device = torch.device('cpu')

# Configure tokenizer
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

print(f"✅ Model loaded successfully!")
print(f"🖥️  Model device: {next(model.parameters()).device}")
print(f"📊 Model parameters: {model.num_parameters():,}")
print(f"🔧 Model dtype: {next(model.parameters()).dtype}")

# Verify no tensors are on CUDA
cuda_tensors = 0
for name, param in model.named_parameters():
    if param.device.type == 'cuda':
        cuda_tensors += 1
        print(f"   ⚠️  Found CUDA tensor: {name} on {param.device}")

if cuda_tensors == 0:
    print("✅ All model parameters confirmed on CPU")
else:
    print(f"❌ Found {cuda_tensors} parameters on CUDA - this will cause issues")

# Cell 2: Load a TINY model (not even TinyLlama - something smaller for quick testing)

In [None]:
# STEP 2: Clean CPU-Only Setup (run AFTER kernel restart)

# Verify environment variables are set
import os
print("🔍 Checking environment variables:")
env_vars = {
    "CUDA_VISIBLE_DEVICES": os.environ.get("CUDA_VISIBLE_DEVICES", "NOT_SET"),
    "TORCH_USE_CUDA_DSA": os.environ.get("TORCH_USE_CUDA_DSA", "NOT_SET"), 
    "CUDA_LAUNCH_BLOCKING": os.environ.get("CUDA_LAUNCH_BLOCKING", "NOT_SET"),
    "PYTORCH_CUDA_ALLOC_CONF": os.environ.get("PYTORCH_CUDA_ALLOC_CONF", "NOT_SET")
}

for key, value in env_vars.items():
    status = "✅" if value != "NOT_SET" else "❌"
    print(f"   {status} {key} = '{value}'")

# If environment variables aren't set, set them now (fallback)
if os.environ.get("CUDA_VISIBLE_DEVICES") is None:
    print("\n⚠️  Environment variables not found - setting them now:")
    os.environ["CUDA_VISIBLE_DEVICES"] = ""
    os.environ["TORCH_USE_CUDA_DSA"] = "0" 
    os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
    os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:0"
    print("   Environment variables set as fallback")

# Now import PyTorch with CPU-only configuration
print("\n📦 Importing PyTorch with CPU-only configuration...")
import torch
import torch.nn as nn

# Verify PyTorch sees no CUDA devices
print(f"🔍 CUDA available: {torch.cuda.is_available()}")
print(f"🔍 CUDA device count: {torch.cuda.device_count()}")

# Force CPU device globally
torch.set_default_device('cpu')
device = torch.device('cpu')
print(f"✅ Default device set to: {device}")

# Import transformers and other libraries
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
from datasets import Dataset

print("📚 All libraries imported successfully")
print("🎯 Ready for CPU-only fine-tuning!")

# Cell 3: Create SUPER simple training data
# Goal: Teach the model to complete crypto-related sentences

In [None]:
training_data = [
    "When Bitcoin price goes up, the market sentiment is bullish.",
    "If a project has a rug pull, investors will lose money.",
    "High trading volume usually indicates strong market interest.",
    "When fear dominates the market, prices tend to drop.",
    "A successful token launch often leads to price appreciation.",
    "Security audits are important for smart contract safety.",
    "Market capitalization reflects the total value of a cryptocurrency.",
    "Liquidity pools enable decentralized trading on DEXs.",
    "Whale movements can significantly impact token prices.",
    "Technical analysis helps predict short-term price movements."
]

# Convert to dataset format
dataset = Dataset.from_dict({"text": training_data})
print(f"📈 Training samples: {len(dataset)}")

# Cell 4: Tokenize the data

In [None]:
def tokenize_function(examples):
    return tokenizer(
        examples["text"],
        truncation=True,
        padding=True,
        max_length=128,  # Keep it short for quick training
    )

tokenized_dataset = dataset.map(tokenize_function, batched=True)
print("✅ Data tokenized!")

# Cell 5: Set up training arguments (MINIMAL for quick testing)

In [None]:
# STEP 5: CPU-Only Training Arguments (Anti-CUDA Configuration)

print("🔧 Configuring training arguments for strict CPU-only operation...")

# Disable Accelerate library CUDA detection
import accelerate
from accelerate import Accelerator

# Force Accelerator to use CPU only
accelerator = Accelerator(cpu=True)

training_args = TrainingArguments(
    output_dir="./models/fine_tuned/hello_world",
    num_train_epochs=2,           # Reduced for CPU training
    per_device_train_batch_size=1, # Small batch for CPU
    gradient_accumulation_steps=2, # Compensate for smaller batch size
    logging_steps=1,              # See progress immediately
    save_steps=50,                # Less frequent saves
    
    # CPU-ONLY SETTINGS - CRITICAL FOR GTX 1050
    remove_unused_columns=False,
    fp16=False,                   # No mixed precision on CPU
    bf16=False,                   # No bfloat16 on CPU  
    dataloader_pin_memory=False,  # No GPU memory pinning
    report_to=[],                 # Disable wandb/tensorboard logging
    no_cuda=True,                 # Explicitly disable CUDA
    use_cpu=True,                 # Force CPU usage
    
    # DISABLE ACCELERATE OPTIMIZATIONS
    disable_tqdm=False,           # Keep progress bars
    dataloader_num_workers=0,     # No multiprocessing (can cause CUDA issues)
    skip_memory_metrics=True,     # Skip GPU memory metrics
    
    # FORCE CPU DEVICE
    local_rank=-1,                # No distributed training
    device='cpu',                 # Explicit CPU device
)

print("✅ Training arguments configured:")
print(f"   📊 Epochs: {training_args.num_train_epochs}")
print(f"   🔢 Batch size: {training_args.per_device_train_batch_size}")
print(f"   🚫 CUDA disabled: {training_args.no_cuda}")
print(f"   🖥️  CPU enforced: {getattr(training_args, 'use_cpu', 'Not set')}")
print(f"   💾 FP16 disabled: {not training_args.fp16}")
print("   ⚡ Accelerator configured for CPU-only operation")

# Cell 6: Create trainer

In [None]:
# STEP 6: Create CPU-Only Trainer with Device Verification

print("🎯 Creating trainer with comprehensive device verification...")

# Final device verification before trainer creation
print("\n🔍 Pre-trainer device verification:")
print(f"   🖥️  Model device: {next(model.parameters()).device}")
print(f"   📊 PyTorch default device: {torch.tensor(1.0).device}")
print(f"   🚫 CUDA available: {torch.cuda.is_available()}")
print(f"   📱 CUDA devices: {torch.cuda.device_count()}")

# Ensure all data will be on CPU by checking tokenized dataset
sample_batch = tokenized_dataset[0]
if 'input_ids' in sample_batch:
    sample_tensor = torch.tensor(sample_batch['input_ids'])
    print(f"   📝 Sample data device: {sample_tensor.device}")

# Create trainer with explicit CPU-only configuration
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
)

print("\n✅ Trainer created successfully!")

# Post-creation verification
print("\n🔍 Post-trainer device verification:")
print(f"   🖥️  Trainer model device: {trainer.model.device}")
print(f"   📊 Training dataset size: {len(trainer.train_dataset)}")

# Check if trainer is using accelerate and its configuration
if hasattr(trainer, 'accelerator'):
    print(f"   ⚡ Accelerator device: {trainer.accelerator.device}")
    print(f"   🚫 Accelerator CPU mode: {trainer.accelerator.cpu}")
else:
    print("   ⚡ No accelerator detected")

# Final safety check - scan for any CUDA tensors
print("\n🛡️  Final safety check...")
cuda_found = False
for name, param in model.named_parameters():
    if param.device.type == 'cuda':
        print(f"   ❌ CUDA tensor found: {name}")
        cuda_found = True

if not cuda_found:
    print("   ✅ All tensors confirmed on CPU")
    print("   🎯 Ready for safe CPU training!")
else:
    print("   ⚠️  CUDA tensors detected - training may fail")

print(f"\n🚀 Trainer ready for CPU-only training on GTX 1050 system!")

# Cell 7: Train! (This should take 1-3 minutes)

# Cell 8: Test the fine-tuned model

In [None]:
# STEP 7: Execute CPU Training

print("🏋️ Starting CPU training...")
print("⏰ This will take a few minutes on CPU - please be patient")

# One final verification before training
print(f"\n📊 Pre-training status:")
print(f"   Model device: {next(trainer.model.parameters()).device}")
print(f"   CUDA available: {torch.cuda.is_available()}")
print(f"   Training epochs: {trainer.args.num_train_epochs}")
print(f"   Batch size: {trainer.args.per_device_train_batch_size}")

try:
    # Start training
    trainer.train()
    
    print("\n🎉 Training completed successfully!")
    print("💾 Model saved to: ./models/fine_tuned/hello_world")
    print("✅ GTX 1050 compatibility issue resolved!")
    
except Exception as e:
    print(f"\n❌ Training failed with error: {e}")
    print("📋 Error type:", type(e).__name__)
    
    # If it's still a CUDA error, provide guidance
    if "CUDA" in str(e):
        print("\n🔍 CUDA error detected even with CPU-only setup.")
        print("💡 Suggested solutions:")
        print("   1. Ensure you restarted the kernel after Step 1")
        print("   2. Check that CUDA_VISIBLE_DEVICES is empty")
        print("   3. Try completely restarting Jupyter")
        print(f"   4. Current CUDA_VISIBLE_DEVICES: '{os.environ.get('CUDA_VISIBLE_DEVICES', 'NOT_SET')}'")
    
    raise e

In [None]:
# STEP 8: Test the Fine-Tuned Model (CPU-Only)

print("🧪 Testing fine-tuned model with CPU-only inference...")

# Verify model is still on CPU after training
print(f"🔍 Model device verification: {next(model.parameters()).device}")

test_prompts = [
    "When Ethereum price goes up,",
    "If a project has good fundamentals,", 
    "High trading volume indicates"
]

print(f"\n🎯 Running {len(test_prompts)} test completions:")

for i, prompt in enumerate(test_prompts, 1):
    print(f"\n--- Test {i}/{len(test_prompts)} ---")
    
    # Tokenize input (ensure it's on CPU)
    inputs = tokenizer(prompt, return_tensors="pt")
    
    # Verify input tensors are on CPU
    input_device = inputs.input_ids.device
    print(f"Input device: {input_device}")
    
    if input_device.type == 'cuda':
        print("⚠️  Input on CUDA - moving to CPU")
        inputs = {k: v.to('cpu') for k, v in inputs.items()}
    
    # Generate text with CPU-optimized settings
    try:
        with torch.no_grad():  # Save memory during inference
            outputs = model.generate(
                inputs.input_ids,
                attention_mask=inputs.attention_mask,
                max_length=30,
                num_return_sequences=1,
                temperature=0.7,
                pad_token_id=tokenizer.eos_token_id,
                do_sample=True,
                top_p=0.9,
                device='cpu'  # Explicit CPU device for generation
            )
        
        # Decode result
        generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
        print(f"✅ Prompt: {prompt}")
        print(f"📝 Completion: {generated_text}")
        
    except Exception as e:
        print(f"❌ Generation failed: {e}")
        if "CUDA" in str(e):
            print("   Still encountering CUDA issues during inference")
        
    print("-" * 60)

print("\n🎉 Testing completed!")
print("✅ Your fine-tuned model is working on CPU")
print("🖥️  GTX 1050 compatibility achieved!")
print("\n💡 The model is now specialized for crypto-related text generation")