<a href="https://colab.research.google.com/github/qsardor/GoogleColabProjects/blob/main/DeepSeek_R1_1_5B.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install required packages
!pip install -q gradio transformers accelerate bitsandbytes

from google.colab import userdata
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import gradio as gr
import time

# --- Configuration --- #
MODEL_NAME = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"

# --- Load Credentials --- #
HF_TOKEN = userdata.get('HF_TOKEN')

# --- Model Loading --- #
print("🚀 Loading model...")
quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype="float16",
    bnb_4bit_quant_type="nf4"
)

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=HF_TOKEN)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    device_map="auto",
    quantization_config=quant_config,
    token=HF_TOKEN
)

# --- Chat Function --- #
def respond(message, history):
    formatted_input = tokenizer.apply_chat_template(
        [{"role": "user", "content": message}],
        tokenize=False,
        add_generation_prompt=True
    )

    inputs = tokenizer(formatted_input, return_tensors="pt").to(model.device)
    outputs = model.generate(
        **inputs,
        max_new_tokens=512,
        temperature=0.7,
        top_p=0.9,
        do_sample=True,
        eos_token_id=tokenizer.eos_token_id
    )

    return tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)

# --- Gradio Interface --- #
def create_interface():
    return gr.ChatInterface(
        fn=respond,
        title="DeepSeek-R1 Chatbot",
        description="Safe & helpful AI assistant",
        theme="soft",
        examples=["Hello!", "Explain quantum computing", "How to make pancakes?"]
    )

# --- Launch with Public Link --- #
print("🌐 Starting Gradio server...")
interface = create_interface()
public_url = interface.launch(share=True, server_port=7860)[1]
print(f"\n✅ Public URL: {public_url}\n")

# Keep the cell running
try:
    while True:
        time.sleep(3600)  # Keep alive in 1-hour intervals
except KeyboardInterrupt:
    print("\n🔴 Shutting down...")
    interface.close()