# 🚀 Xoron Multimodal Model - Kaggle Setup

**Model:** `Backup-bdg/Xoron-Dev-MultiMoe`

This notebook sets up and runs the Xoron multimodal model for:
- Text generation
- Image generation (snowy mountain)
- Video generation (windy mountain)


## Step 1: Install Dependencies


In [None]:
# Install PyTorch and dependencies
!pip install -q torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
!pip install -q transformers safetensors huggingface_hub accelerate
!pip install -q Pillow opencv-python

print("✅ Dependencies installed!")

## Step 2: Clone Repository & Setup Path


In [None]:
import os
import sys

# Clone xformer repo
XFORMER_DIR = "./xformer"
if not os.path.exists(XFORMER_DIR):
    print("📥 Cloning xformer repository...")
    !git clone https://github.com/nigfuapp-web/xformer.git {XFORMER_DIR}

# Add kt-kernel to Python path (NO build required!)
kt_path = os.path.abspath(f"{XFORMER_DIR}/kt-kernel/python")
if kt_path not in sys.path:
    sys.path.insert(0, kt_path)

print(f"✅ Added to path: {kt_path}")

# Test import
from kt_kernel.models.xoron import XoronForCausalLM, XoronMultimodalProcessor
print("✅ Xoron model imported!")

## Step 3: Download Model from HuggingFace


In [None]:
from huggingface_hub import snapshot_download

MODEL_REPO = "Backup-bdg/Xoron-Dev-MultiMoe"
MODEL_DIR = "./xoron-model"

if not os.path.exists(MODEL_DIR):
    print(f"📥 Downloading {MODEL_REPO}...")
    snapshot_download(repo_id=MODEL_REPO, local_dir=MODEL_DIR)
print(f"✅ Model ready at {MODEL_DIR}")

## Step 4: Load Model


In [None]:
import torch

print("🔄 Loading Xoron model...")
print(f"   CUDA: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"   GPU: {torch.cuda.get_device_name(0)}")

model = XoronForCausalLM.from_pretrained(
    MODEL_DIR,
    torch_dtype=torch.float16,
    device_map="auto",
    trust_remote_code=True,
)
processor = XoronMultimodalProcessor.from_pretrained(MODEL_DIR)
device = next(model.parameters()).device

print(f"✅ Model loaded on {device}")

## Step 5: Test Text Generation


In [None]:
def generate(prompt, max_tokens=256):
    inputs = processor(text=prompt, return_tensors="pt")
    inputs = {k: v.to(device) if hasattr(v, 'to') else v for k, v in inputs.items()}
    with torch.no_grad():
        out = model.generate(**inputs, max_new_tokens=max_tokens, temperature=0.7, do_sample=True)
    return processor.decode(out[0], skip_special_tokens=True)

print("💬 Text Generation Test")
print("-" * 40)
response = generate("Hello! What are your capabilities?")
print(response)

## Step 6: Generate Snowy Mountain Image


In [None]:
print("🏔️ Image Generation - Snowy Mountain")
print("-" * 40)

prompt = "Generate a beautiful picture of a majestic mountain peak covered with lots of pristine white snow, dramatic lighting, photorealistic"
response = generate(prompt, max_tokens=300)
print(response)

## Step 7: Generate Windy Mountain Video


In [None]:
print("🌬️ Video Generation - Windy Mountain")
print("-" * 40)

prompt = "Generate a video of a mountain landscape with windy climate, trees swaying, clouds moving rapidly, dramatic weather"
response = generate(prompt, max_tokens=300)
print(response)

## Done! 🎉
