In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

model_id = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.float16,  # Use half-precision for faster inference
    device_map="auto",         # Automatically map model to GPU
    trust_remote_code=True
)

model.eval()
print("DeepSeek-R1 loaded successfully!")


  from .autonotebook import tqdm as notebook_tqdm
Some parameters are on the meta device because they were offloaded to the cpu and disk.


DeepSeek-R1 loaded successfully!


In [5]:
# @title Chatbot with R1

# import torch

def chat_loop():
    """
    Runs an interactive chat loop where the user can input text and receive model-generated responses.
    Type 'exit' to stop the conversation.
    """
    print("AI Chatbot is ready! Type 'exit' to end the conversation.\n")

    while True:
        # Get user input
        prompt = input("You: ")

        # Exit condition
        if prompt.lower() == "exit":
            print("Chatbot: Goodbye!")
            break

        # Tokenize the input
        inputs = tokenizer(
            prompt,
            return_tensors="pt",
            padding=True,           # Ensures uniform input size
            truncation=True,        # Prevents overflow beyond model's max token length
        ).to(model.device)         # Move input to the model's device (CPU/GPU)

        # Generate response without tracking gradients (more efficient)
        with torch.no_grad():
            outputs = model.generate(
                inputs["input_ids"],
                attention_mask=inputs["attention_mask"], # Ensures proper attention mechanism
                max_new_tokens=500,                     # Limit response length
                temperature=0.7,                        # Controls randomness (higher = more creative)
                pad_token_id=tokenizer.eos_token_id     # Handles padding properly
            )

        # Decode and print the response
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
        print(f"Chatbot: {response}\n")

# Start the interactive chat loop
chat_loop()


AI Chatbot is ready! Type 'exit' to end the conversation.



: 

: 

In [1]:
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

# Load pre-trained model and tokenizer
model_id = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.float16,
    device_map="auto",
    trust_remote_code=True
)
model.eval()

# Define chatbot response function
def chatbot_response(prompt):
    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(model.device)
    with torch.no_grad():
        outputs = model.generate(
            inputs["input_ids"],
            attention_mask=inputs["attention_mask"],
            max_new_tokens=500,
            temperature=0.7,
            pad_token_id=tokenizer.eos_token_id
        )
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

# Launch Gradio interface
interface = gr.Interface(fn=chatbot_response, inputs="text", outputs="text", title="AI Chatbot")
interface.launch()


* Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.


