# üöÄ Hybrid Mamba-xLSTM: Google Colab Setup

Complete setup and training guide for Google Colab

## Step 1: Check GPU & Install Dependencies

In [None]:
# Check GPU availability
import torch
print(f"GPU Available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU Name: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
else:
    print("‚ö†Ô∏è No GPU available. Please enable GPU in Runtime settings!")

In [None]:
# Clone and install the project and test it out 
!git clone https://github.com/krishankb-de/hybrid_model_mamba_xlstm.git
%cd hybrid_model_mamba_xlstm
!pip install -e . -q

## Step 2: Mount Google Drive (Optional, for saving checkpoints)

In [None]:
from google.colab import drive
drive.mount('/content/drive')
print("‚úì Google Drive mounted!")

## Step 3: Quick Test (2 minutes)

In [None]:
# Quick inference test with 70M model (completes in < 2 minutes)
python_script = """
import torch
from transformers import AutoTokenizer
import sys
sys.path.insert(0, '/content/hybrid_model_mamba_xlstm')

from hybrid_xmamba.models.configuration_hybrid import HybridConfig
from hybrid_xmamba.models.hybrid_lm import HybridLanguageModel

print('Loading 70M model...')
config = HybridConfig(
    dim=512,
    num_layers=8,
    vocab_size=50257,
    state_size=16,
    conv_size=4,
    expand_factor=2,
    use_fast_path=True,
    head_dim=64,
    num_heads=8,
    use_tfla=True,
    proj_factor=2,
    slstm_hidden_dim=512,
    slstm_num_heads=4,
    use_exponential_gate=True,
)

model = HybridLanguageModel(config).eval().cuda()
tokenizer = AutoTokenizer.from_pretrained('gpt2')

print('Testing inference...')
text = 'The quick brown fox jumps over the lazy dog'
inputs = tokenizer(text, return_tensors='pt')

with torch.no_grad():
    # Pass only input_ids to the model
    outputs = model(input_ids=inputs['input_ids'].cuda())

print(f'‚úì Model loaded and inference works!')
print(f'Output shape: {outputs.logits.shape}')
print(f'Model parameters: {sum(p.numel() for p in model.parameters()) / 1e6:.1f}M')
print('‚úÖ Quick test completed successfully!')
"""

!python -c "$python_script"

### For 350M complete model 

In [None]:
# Full training (for local machine with GPU, not recommended for Colab free tier)
# Uncomment and run this on a machine with >= 24GB GPU VRAM
# Note: The 350M model is too large for Colab T4 (15GB). 
# For Colab training, consider using a smaller model or local machine.

# !python scripts/train.py \
#     model=hybrid_350m \
#     dataset=wikitext \
#     trainer=colab_single_gpu \
#     trainer.max_epochs=3 \
#     trainer.default_root_dir=/content/drive/MyDrive/hybrid_mamba_checkpoints \
#     dataset.batch_size=4 \
#     dataset.eval_batch_size=4 \
#     dataset.num_workers=0 \
#     +dataset.max_seq_length=128 \
#     wandb.enabled=false

# print("üìù For full training on Colab, use a smaller model or local GPU")
# print("‚úÖ Step 3 (inference test) completed successfully!")
# print("üöÄ To train locally: python scripts/train.py model=hybrid_350m dataset=wikitext trainer=single_gpu")

## Step 4: Full Training (Optional)

In [None]:
# Option 1: Train 70M Hybrid Model (Recommended for Colab T4)
# The 70M model fits in Colab's 15GB GPU memory
# Training will take ~2-3 hours per epoch on T4

print("="*80)
print("üöÄ TRAINING HYBRID 70M MODEL ON COLAB")
print("="*80)
print("\nModel: hybrid_70m (70M parameters)")
print("GPU: Colab T4 (15GB VRAM)")
print("Estimated time per epoch: 2-3 hours")
print("Dataset: WikiText-103 (1.8M training examples)")
print("\nRunning training... This may take a while.")
print("="*80 + "\n")

# Train the 70M hybrid model with optimized Colab settings
!python scripts/train.py \
    model=hybrid_70m \
    dataset=wikitext \
    trainer=colab_single_gpu \
    trainer.max_epochs=1 \
    trainer.num_sanity_val_steps=0 \
    dataset.batch_size=4 \
    dataset.eval_batch_size=4 \
    dataset.num_workers=2 \
    dataset.preprocessing_num_workers=2 \
    +dataset.max_seq_length=256 \
    trainer.accumulate_grad_batches=2 \
    trainer.val_check_interval=0.5 \
    trainer.log_every_n_steps=20 \
    wandb.enabled=false \
    trainer.enable_checkpointing=true \
    trainer.default_root_dir=/content/outputs

print("\n" + "="*80)
print("‚úÖ Training completed!")
print("="*80)

# Optional: Copy results to Google Drive for persistence
!mkdir -p /content/drive/MyDrive/hybrid_mamba_results 2>/dev/null
!cp -r /content/outputs /content/drive/MyDrive/hybrid_mamba_results/ 2>/dev/null && echo "Results saved to Google Drive" || echo "Could not save to Drive (not mounted)"


In [None]:
# Option 2: Train 150M Hybrid Model (More compute-intensive, may need gradient checkpointing)
# Only use this if you want to train a larger model
# May result in OOM on T4 - use gradient checkpointing or reduce batch size if needed

print("="*80)
print("üöÄ TRAINING HYBRID 150M MODEL ON COLAB")
print("="*80)
print("\nModel: hybrid_150m (150M parameters)")
print("GPU: Colab T4 (15GB VRAM)")
print("Estimated time per epoch: 3-4 hours (slower due to size)")
print("Dataset: WikiText-103")
print("\nNote: This is more resource-intensive. If you get OOM errors:")
print("  - Reduce batch_size to 2")
print("  - Reduce +dataset.max_seq_length to 128")
print("  - Increase trainer.accumulate_grad_batches to 4")
print("="*80 + "\n")

# Train the 150M hybrid model
!python scripts/train.py \
    model=hybrid_150m \
    dataset=wikitext \
    trainer=colab_single_gpu \
    trainer.max_epochs=1 \
    trainer.num_sanity_val_steps=0 \
    dataset.batch_size=2 \
    dataset.eval_batch_size=2 \
    dataset.num_workers=2 \
    dataset.preprocessing_num_workers=2 \
    +dataset.max_seq_length=256 \
    trainer.accumulate_grad_batches=2 \
    trainer.val_check_interval=0.5 \
    trainer.log_every_n_steps=20 \
    wandb.enabled=false \
    trainer.enable_checkpointing=true \
    trainer.default_root_dir=/content/outputs

print("\n" + "="*80)
print("‚úÖ Training completed!")
print("="*80)
