In [None]:
!pip install -q kagglehub mistral-common bitsandbytes transformers --no-cache-dir
!pip install -q accelerate torch --no-cache-dir

import shutil
import os
import gc

In [None]:
def cleanup_cache():
    """Clean up unnecessary files to save disk space"""
    cache_dirs = ['/root/.cache', '/tmp/kagglehub']
    for cache_dir in cache_dirs:
        if os.path.exists(cache_dir):
            shutil.rmtree(cache_dir, ignore_errors=True)
    gc.collect()

cleanup_cache()
print("🧹 Disk space optimized!")

In [None]:
import warnings
warnings.filterwarnings("ignore")

import torch
import kagglehub
from mistral_common.protocol.instruct.messages import UserMessage
from mistral_common.protocol.instruct.request import ChatCompletionRequest
from mistral_common.tokens.tokenizers.mistral import MistralTokenizer
from transformers import AutoModelForCausalLM, BitsAndBytesConfig

In [None]:
class LightweightDevstral:
    def __init__(self):
        print("📦 Downloading model (streaming mode)...")

        self.model_path = kagglehub.model_download(
            'mistral-ai/devstral-small-2505/Transformers/devstral-small-2505/1',
            force_download=False
        )

        quantization_config = BitsAndBytesConfig(
            bnb_4bit_compute_dtype=torch.float16,
            bnb_4bit_quant_type="nf4",
            bnb_4bit_use_double_quant=True,
            bnb_4bit_quant_storage=torch.uint8,
            load_in_4bit=True
        )

        print("⚡ Loading ultra-compressed model...")
        self.model = AutoModelForCausalLM.from_pretrained(
            self.model_path,
            torch_dtype=torch.float16,
            device_map="auto",
            quantization_config=quantization_config,
            low_cpu_mem_usage=True,
            trust_remote_code=True
        )

        self.tokenizer = MistralTokenizer.from_file(f'{self.model_path}/tekken.json')

        cleanup_cache()
        print("✅ Lightweight assistant ready! (~2GB disk usage)")

    def generate(self, prompt, max_tokens=400):
        """Memory-efficient generation"""
        tokenized = self.tokenizer.encode_chat_completion(
            ChatCompletionRequest(messages=[UserMessage(content=prompt)])
        )

        input_ids = torch.tensor([tokenized.tokens])
        if torch.cuda.is_available():
            input_ids = input_ids.to(self.model.device)

        with torch.inference_mode():
            output = self.model.generate(
                input_ids=input_ids,
                max_new_tokens=max_tokens,
                temperature=0.6,
                top_p=0.85,
                do_sample=True,
                pad_token_id=self.tokenizer.eos_token_id,
                use_cache=True
            )[0]

        del input_ids
        torch.cuda.empty_cache() if torch.cuda.is_available() else None

        return self.tokenizer.decode(output[len(tokenized.tokens):])

print("🚀 Initializing lightweight AI assistant...")
assistant = LightweightDevstral()

In [None]:
def run_demo(title, prompt, emoji="🎯"):
    """Run a single demo with cleanup"""
    print(f"\n{emoji} {title}")
    print("-" * 50)

    result = assistant.generate(prompt, max_tokens=350)
    print(result)

    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()

run_demo(
    "Quick Prime Finder",
    "Write a fast prime checker function `is_prime(n)` with explanation and test cases.",
    "🔢"
)

run_demo(
    "Debug This Code",
    """Fix this buggy function and explain the issues:
```python
def avg_positive(numbers):
    total = sum([n for n in numbers if n > 0])
    return total / len([n for n in numbers if n > 0])
```""",
    "🐛"
)

run_demo(
    "Text Tool Creator",
    "Create a simple `TextAnalyzer` class with word count, char count, and palindrome check methods.",
    "🛠️"
)

In [None]:
def quick_coding():
    """Lightweight interactive session"""
    print("\n🎮 QUICK CODING MODE")
    print("=" * 40)
    print("Enter short coding prompts (type 'exit' to quit)")

    session_count = 0
    max_sessions = 5

    while session_count < max_sessions:
        prompt = input(f"\n[{session_count+1}/{max_sessions}] Your prompt: ")

        if prompt.lower() in ['exit', 'quit', '']:
            break

        try:
            result = assistant.generate(prompt, max_tokens=300)
            print("💡 Solution:")
            print(result[:500])

            gc.collect()
            if torch.cuda.is_available():
                torch.cuda.empty_cache()

        except Exception as e:
            print(f"❌ Error: {str(e)[:100]}...")

        session_count += 1

    print(f"\n✅ Session complete! Memory cleaned.")

In [None]:
def check_disk_usage():
    """Monitor disk usage"""
    import subprocess
    try:
        result = subprocess.run(['df', '-h', '/'], capture_output=True, text=True)
        lines = result.stdout.split('\n')
        if len(lines) > 1:
            usage_line = lines[1].split()
            used = usage_line[2]
            available = usage_line[3]
            print(f"💾 Disk: {used} used, {available} available")
    except:
        print("💾 Disk usage check unavailable")


print("\n🎉 Tutorial Complete!")
cleanup_cache()
check_disk_usage()

print("\n💡 Space-Saving Tips:")
print("• Model uses ~2GB vs original ~7GB+")
print("• Automatic cache cleanup after each use")
print("• Limited token generation to save memory")
print("• Use 'del assistant' when done to free ~2GB")
print("• Restart runtime if memory issues persist")

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.5/6.5 MB[0m [31m54.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.0/67.0 MB[0m [31m38.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m43.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m60.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m54.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m100.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m78.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m88.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

Downloading 21 files:   0%|          | 0/21 [00:00<?, ?it/s]

Downloading from https://www.kaggle.com/api/v1/models/mistral-ai/devstral-small-2505/Transformers/devstral-small-2505/1/download/generation_config.json...





Downloading from https://www.kaggle.com/api/v1/models/mistral-ai/devstral-small-2505/Transformers/devstral-small-2505/1/download/assets/tuto_open_hands/agent_prompting.png...


100%|██████████| 111/111 [00:00<00:00, 229kB/s]


Downloading from https://www.kaggle.com/api/v1/models/mistral-ai/devstral-small-2505/Transformers/devstral-small-2505/1/download/README.md...



  0%|          | 0.00/385k [00:00<?, ?B/s][A

  0%|          | 0.00/17.2k [00:00<?, ?B/s]

Downloading from https://www.kaggle.com/api/v1/models/mistral-ai/devstral-small-2505/Transformers/devstral-small-2505/1/download/consolidated.safetensors...


100%|██████████| 17.2k/17.2k [00:00<00:00, 17.2MB/s]



100%|██████████| 385k/385k [00:00<00:00, 10.6MB/s]

Downloading from https://www.kaggle.com/api/v1/models/mistral-ai/devstral-small-2505/Transformers/devstral-small-2505/1/download/assets/tuto_open_hands/app_ui.png...





Downloading from https://www.kaggle.com/api/v1/models/mistral-ai/devstral-small-2505/Transformers/devstral-small-2505/1/download/assets/swe_bench.png...
Downloading from https://www.kaggle.com/api/v1/models/mistral-ai/devstral-small-2505/Transformers/devstral-small-2505/1/download/config.json...





Downloading from https://www.kaggle.com/api/v1/models/mistral-ai/devstral-small-2505/Transformers/devstral-small-2505/1/download/assets/tuto_open_hands/agent_working.png...


  0%|          | 0.00/96.3k [00:00<?, ?B/s][A

100%|██████████| 96.3k/96.3k [00:00<00:00, 7.18MB/s]




  0%|          | 0.00/800k [00:00<?, ?B/s][A[A[A[A




100%|██████████| 110k/110k [00:00<00:00, 2.55MB/s]
100%|██████████| 620/620 [00:00<00:00, 58.0kB/s]



100%|██████████| 800k/800k [00:00<00:00, 14.4MB/s]



  0%|          | 8.00M/43.9G [00:00<19:09, 41.0MB/s][A[A[A

Downloading from https://www.kaggle.com/api/v1/models/mistral-ai/devstral-small-2505/Transformers/devstral-small-2505/1/download/model-00001-of-00010.safetensors...



  0%|          | 0.00/4.45G [00:00<?, ?B/s][A


  0%|          | 16.0M/43.9G [00:00<14:10, 55.4MB/s][A[A[A

Downloading from https://www.kaggle.com/api/v1/models/mistral-ai/devstral-small-2505/Transformers/devstral-small-2505/1/download/model-00003-of-00010.safetensors...




  0%|          | 0.00/4.45G [00:00<?, ?B/s][A[A

Downloading from https://www.kaggle.com/api/v1/models/mistral-ai/devstral-small-2505/Transformers/devstral-small-2505/1/download/model-00002-of-00010.safetensors...






  0%|          | 0.00/4.45G [00:00<?, ?B/s]

Downloading from https://www.kaggle.com/api/v1/models/mistral-ai/devstral-small-2505/Transformers/devstral-small-2505/1/download/model-00006-of-00010.safetensors...


[A[A[A[A

Downloading from https://www.kaggle.com/api/v1/models/mistral-ai/devstral-small-2505/Transformers/devstral-small-2505/1/download/model-00004-of-00010.safetensors...
Downloading from https://www.kaggle.com/api/v1/models/mistral-ai/devstral-small-2505/Transformers/devstral-small-2505/1/download/model-00005-of-00010.safetensors...








  0%|          | 0.00/4.55G [00:00<?, ?B/s][A[A[A[A[A[A




  0%|          | 0.00/4.45G [00:00<?, ?B/s][A[A[A[A[A
  0%|          | 1.00M/4.45G [00:00<10:31, 7.57MB/s][A






  0%|          | 0.00/4.45G [00:00<?, ?B/s][A[A[A[A[A[A[A

Downloading from https://www.kaggle.com/api/v1/models/mistral-ai/devstral-small-2505/Transformers/devstral-small-2505/1/download/model-00007-of-00010.safetensors...


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
 53%|█████▎    | 2.37G/4.45G [05:32<03:00, 12.4MB/s][A[A[A[A[A[A[A


  5%|▌         | 2.36G/43.9G [05:33<1:01:36, 12.1MB/s][A[A[A




 53%|█████▎    | 2.36G/4.45G [05:32<03:08, 11.9MB/s][A[A[A[A[A

 53%|█████▎    | 2.38G/4.45G [05:32<03:20, 11.1MB/s][A[A
 53%|█████▎    | 2.36G/4.45G [05:33<02:59, 12.6MB/s][A



 54%|█████▍    | 2.41G/4.45G [05:32<02:47, 13.1MB/s][A[A[A[A







 52%|█████▏    | 2.36G/4.55G [05:32<03:17, 11.9MB/s][A[A[A[A[A[A[A[A





 52%|█████▏    | 2.39G/4.55G [05:32<02:39, 14.5MB/s][A[A[A[A[A[A


  5%|▌         | 2.36G/43.9G [05:33<1:01:07, 12.2MB/s][A[A[A






 53%|█████▎    | 2.38G/4.45G [05:32<03:15, 11.4MB/s][A[A[A[A[A[A[A





 53%|█████▎    | 2.39G/4.55G [05:33<02:39, 14.6MB/s][A[A[A[A[A[A

 53%|█████▎    | 2.38G/4.45G [05:33<03:31, 10.5MB/s][A[A



 54%|█████▍    | 2.41G/4.45G [05:33<02:54, 12.6MB/s][A[A[A[A
 53%|█████▎    | 2.36G/4.45G 