# üß† LLM Interpretability - Simple Pipeline

Train neural networks to interpret and modify other neural networks' weights.


In [None]:
#@title üöÄ Setup & Install Dependencies

# Clone repository and install
!git clone https://github.com/maximus-powers/llm-interpretability.git
# !git pull
%cd llm-interpretability/training_data

# Install dependencies
!pip install -q -r requirements.txt
!pip install -q accelerate datasets transformers[torch] peft tensorboard huggingface_hub

# Check GPU
import torch
if torch.cuda.is_available():
    print(f"‚úÖ GPU: {torch.cuda.get_device_name(0)}")
    print(f"üìä VRAM: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")
else:
    print("‚ö†Ô∏è  Using CPU (will be slower)")

print("‚úÖ Setup complete!")

Cloning into 'llm-interpretability'...
remote: Enumerating objects: 23, done.[K
remote: Counting objects: 100% (23/23), done.[K
remote: Compressing objects: 100% (19/19), done.[K
remote: Total 23 (delta 4), reused 23 (delta 4), pack-reused 0 (from 0)[K
Receiving objects: 100% (23/23), 41.77 KiB | 638.00 KiB/s, done.
Resolving deltas: 100% (4/4), done.
/content/llm-interpretability/training_data
[31mERROR: Operation cancelled by user[0m[31m
[0m

In [None]:
#@title ‚öôÔ∏è Dataset Generation Configuration

NUM_EXAMPLES = 10000 #@param {type:"integer"}
MIN_DEGRADATION = 0.08 #@param {type:"number"}
HUB_USERNAME = "maximuspowers" #@param {type:"string"}
DATASET_NAME = "llm-interpretability-v2" #@param {type:"string"}
PRIVATE_DATASET = False #@param {type:"boolean"}

HUB_DATASET_NAME = f"{HUB_USERNAME}/{DATASET_NAME}"

print(f"üìã Configuration:")
print(f"   Examples: {NUM_EXAMPLES}")
print(f"   Min degradation: {MIN_DEGRADATION}")
print(f"   Hub dataset: {HUB_DATASET_NAME}")
print(f"   Private: {PRIVATE_DATASET}")
print(f"   Estimated time: ~{NUM_EXAMPLES // 50 * 2:.1f} minutes")

In [None]:
#@title üîê HuggingFace Login

HF_TOKEN = "" #@param {type:"string"}

from huggingface_hub import login

if HF_TOKEN:
    login(token=HF_TOKEN)
    print("‚úÖ Logged in to HuggingFace Hub")
else:
    print("‚ö†Ô∏è  Please enter your HuggingFace token above")
    print("   Get token from: https://huggingface.co/settings/tokens")
    print("   Make sure it has 'Write' permissions")

‚úÖ Logged in to HuggingFace Hub


In [None]:
#@title üè≠ Generate Dataset

import subprocess
import time

print(f"üè≠ Starting dataset generation...")
print(f"‚è±Ô∏è  Estimated time: {NUM_EXAMPLES // 50 * 2:.1f} minutes")

start_time = time.time()

# Build command
cmd = [
    "python", "dataset_generation_pipeline.py",
    "--num_examples", str(NUM_EXAMPLES),
    "--dataset_name", "local_dataset",
    "--min_degradation", str(MIN_DEGRADATION),
    "--incremental_save",  # Enable incremental saving every 1000 records
    "--checkpoint_interval", "1000",  # Save every 1000 records
    "--hub_dataset_name", HUB_DATASET_NAME,
    "--hub_token", HF_TOKEN,
    "--verbose"
]

if PRIVATE_DATASET:
    cmd.append("--private")

# Run generation with live output
print("Running:", " ".join(cmd))
print("-" * 60)

# Use Popen for real-time output
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
                          text=True, bufsize=1, universal_newlines=True)

# Print output in real-time
for line in process.stdout:
    print(line.rstrip())

# Wait for completion
return_code = process.wait()

generation_time = time.time() - start_time
print("-" * 60)
print(f"‚è±Ô∏è  Completed in {generation_time/60:.1f} minutes")

if return_code == 0:
    print("‚úÖ Dataset generation successful!")
    print(f"ü§ó Dataset URL: https://huggingface.co/datasets/{HUB_DATASET_NAME}")
else:
    print(f"‚ùå Generation failed (return code: {return_code})")

In [None]:
#@title üëÄ Preview Dataset

from datasets import load_dataset
import json

try:
    # Load dataset from Hub
    dataset = load_dataset(HUB_DATASET_NAME)

    print(f"üìä Dataset Info:")
    print(f"   Train examples: {len(dataset['train'])}")
    print(f"   Validation examples: {len(dataset['validation'])}")
    print(f"   Columns: {dataset['train'].column_names}")

    # Show example
    example = dataset['train'][0]
    metadata = json.loads(example['metadata'])

    print(f"\nüìù Example Row:")
    print(f"   Corrupted pattern: {metadata.get('corrupted_pattern', 'unknown')}")
    print(f"   Clean accuracy: {metadata.get('clean_accuracy', 0):.4f}")
    print(f"   Noisy accuracy: {metadata.get('noisy_accuracy', 0):.4f}")
    print(f"   Degradation: {metadata.get('accuracy_diff', 0):.4f}")
    print(f"   Prompt length: {len(example['prompt'])} chars")
    print(f"   Completion length: {len(example['completion'])} chars")

    print(f"\nüìÑ Sample Prompt (first 500 chars):")
    print("-" * 60)
    print(example['prompt'][:500] + "...")
    print("-" * 60)

    print(f"\nüìÑ Sample Completion (first 300 chars):")
    print("-" * 60)
    print(example['completion'][:300] + "...")
    print("-" * 60)

except Exception as e:
    print(f"‚ùå Could not load dataset: {e}")
    print("   Make sure dataset generation completed successfully")

In [None]:
# ====================================================================
# StarCoder2-7B Fine-tuning on Google Colab - Simplified Approach
# Using the official repository and accelerate launch
# ====================================================================

# Clone the official StarCoder2 repository
!git clone https://github.com/bigcode-project/starcoder2.git
%cd starcoder2

# Install dependencies from requirements.txt plus additional packages
!pip install -q torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
!pip install -q git+https://github.com/huggingface/transformers.git  # Install from source for StarCoder2 support
!pip install -q accelerate>=0.21.0
!pip install -q peft>=0.4.0
!pip install -q trl>=0.4.7
!pip install -q bitsandbytes>=0.40.2
!pip install -q datasets>=2.17.1
!pip install -q wandb>=0.16.3

# Login to Hugging Face (required for model upload)
from huggingface_hub import notebook_login
print("Please log in to Hugging Face to upload your fine-tuned model:")
notebook_login()

# Optional: Login to wandb for experiment tracking
!wandb login

# Set your Hugging Face token as environment variable
import os
# os.environ["HF_TOKEN"] = "your_hf_token_here"  # Uncomment and add your token if needed

# ====================================================================
# CONFIGURATION - MODIFY THESE SETTINGS
# ====================================================================

# Your configuration
HF_USERNAME = "YOUR_USERNAME"  # Replace with your HF username
MODEL_NAME = "starcoder2-7b-finetuned"  # Name for your fine-tuned model
DATASET_NAME = "your-dataset-name"  # Replace with your dataset name
DATASET_SUBSET = "data/python"  # Subset of your dataset (e.g., "data/python")
DATASET_TEXT_FIELD = "content"  # Column name containing the code/text

# Training parameters
MAX_SEQ_LENGTH = 1024
MAX_STEPS = 1000  # Adjust based on your dataset size
MICRO_BATCH_SIZE = 1  # Keep at 1 for Colab T4 GPU
GRADIENT_ACCUMULATION_STEPS = 8
LEARNING_RATE = 2e-4
WARMUP_STEPS = 100
NUM_PROC = 2  # Number of processes for data loading

# ====================================================================
# LAUNCH TRAINING
# ====================================================================

print("üöÄ Starting StarCoder2-7B fine-tuning with accelerate launch...")

# Use accelerate launch to run the official finetune.py script
!accelerate launch finetune.py \
    --model_id "bigcode/starcoder2-7b" \
    --dataset_name "{DATASET_NAME}" \
    --subset "{DATASET_SUBSET}" \
    --dataset_text_field "{DATASET_TEXT_FIELD}" \
    --split "train" \
    --max_seq_length {MAX_SEQ_LENGTH} \
    --max_steps {MAX_STEPS} \
    --micro_batch_size {MICRO_BATCH_SIZE} \
    --gradient_accumulation_steps {GRADIENT_ACCUMULATION_STEPS} \
    --learning_rate {LEARNING_RATE} \
    --warmup_steps {WARMUP_STEPS} \
    --num_proc {NUM_PROC} \
    --output_dir "starcoder2_7b_finetuned" \
    --push_to_hub True

print("‚úÖ Training completed!")

# ====================================================================
# UPLOAD TO HUGGING FACE HUB (if not done automatically)
# ====================================================================

# The official script should automatically push to hub if --push_to_hub True
# But you can also manually upload if needed:
print("üöÄ If the model wasn't automatically uploaded, you can manually upload it:")
print(f"Check your model at: https://huggingface.co/{HF_USERNAME}")

# ====================================================================
# QUICK INFERENCE TEST
# ====================================================================

print("üß™ Testing the fine-tuned model...")

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel

# Load the base model and apply LoRA weights
base_model_id = "bigcode/starcoder2-7b"
adapter_path = "./starcoder2_7b_finetuned/final_checkpoint"

try:
    # Load tokenizer
    tokenizer = AutoTokenizer.from_pretrained(base_model_id)
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token

    # Load base model
    base_model = AutoModelForCausalLM.from_pretrained(
        base_model_id,
        torch_dtype=torch.bfloat16,
        device_map="auto"
    )

    # Load LoRA adapter
    model = PeftModel.from_pretrained(base_model, adapter_path)
    model = model.merge_and_unload()  # Merge LoRA weights with base model

    # Test prompt
    test_prompt = "def fibonacci(n):"

    # Generate
    inputs = tokenizer(test_prompt, return_tensors="pt").to(model.device)

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=100,
            temperature=0.7,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id,
        )

    # Decode and print result
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    print(f"üìù Test generation:")
    print(f"Input: {test_prompt}")
    print(f"Output:\n{generated_text}")

except Exception as e:
    print(f"‚ùå Error during inference test: {e}")
    print("The model should still be saved and uploaded to HF Hub if training completed successfully.")

print("\n‚ú® Fine-tuning process complete! ‚ú®")
