In [None]:
%pip install -q transformers torch gradio

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import gradio as gr
import torch
from kaggle_secrets import UserSecretsClient
from huggingface_hub import login

# Authenticate with Hugging Face
secret_label = "HF_TOKEN"  # Replace with your secret label
secret_value = UserSecretsClient().get_secret(secret_label)
login(secret_value)

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load your fine-tuned model and tokenizer
model_path = "/kaggle/input/model-2/llama-3.2-3b-eade-finetuned"  # Adjust path as needed
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path).to(device)

# Function to generate response
def chatbot_response(input_text):
    inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
    outputs = model.generate(inputs, max_length=100, do_sample=True)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

# Create Gradio interface
interface = gr.Interface(fn=chatbot_response, inputs="text", outputs="text", title="Finetuned LLaMA Chatbot")

# Launch the interface
interface.launch()