# LLaMA Chatbot
A basic chatbot interface using a LLaMA-style model from Hugging Face.

In [None]:
!pip install -q transformers accelerate gradio

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import gradio as gr

In [None]:
# Load a small LLaMA-style model (no API key required)
model_name = "tiiuae/falcon-rw-1b"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name).to("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
def chat(prompt):
    inputs = tokenizer(prompt, return_tensors="pt")
    outputs = model.generate(**inputs, max_new_tokens=100)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

In [None]:
gr.Interface(
    fn=chat,
    inputs=gr.Textbox(label="Ask something"),
    outputs=gr.Textbox(label="Response"),
    title="LLaMA Chatbot",
    description="Talk to a small open-source language model"
).launch()