<a href="https://colab.research.google.com/github/pushkar-hue/Chad-Bot/blob/main/gradio_ui.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q gradio

In [None]:
!pip install -q accelerate bitsandbytes fsspec==2025.3.2 datasets peft transformers trl

In [None]:
import torch
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline
from peft import PeftModel

In [None]:
base_model_id = "meta-llama/Meta-Llama-3-8B-Instruct"

adapter_id = "notninja/chad-gpt"

In [None]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
)

# Load the base model with quantization
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_id,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
)


In [None]:
tokenizer = AutoTokenizer.from_pretrained(base_model_id, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

model = PeftModel.from_pretrained(base_model, adapter_id)

In [None]:
generator = pipeline("text-generation", model=model, tokenizer=tokenizer)


In [None]:
def get_response(message, history):
    # Format the prompt with the Llama-3 chat template
    prompt = f"<s>[INST] {message} [/INST]"

    # Generate the response
    result = generator(
        prompt,
        max_new_tokens=150,
        temperature=0.7,
        top_p=0.9,
        eos_token_id=tokenizer.eos_token_id,
        do_sample=True,
    )

    # Clean up the output
    full_text = result[0]['generated_text']
    response_only = full_text.split("[/INST]")[-1].strip()
    if response_only.endswith("</s>"):
        response_only = response_only[:-len("</s>")].strip()

    return response_only

In [None]:

gr.ChatInterface(
    fn=get_response,
    title="Chad-Bot 🤖",
    description="Ask me anything, fam. I've been fine-tuned on the latest Gen-Z slang. It's giving... intelligence. ✨",
    examples=[
        "What does 'cap' mean?",
        "How do I become more confident?",
        "What's the vibe today?",
        "Explain blockchain like I'm 5."
    ],
    theme="soft"
).launch(share=True)

In [None]:
def chat_with_bot(user_prompt):
    """
    Generates a response from the fine-tuned model for a given prompt.
    """
    prompt = f"<s>[INST] {user_prompt} [/INST]"

    # Generate the response using the pipeline
    result = generator(
        prompt,
        max_new_tokens=150,
        temperature=0.7,
        top_p=0.9,
        eos_token_id=tokenizer.eos_token_id,
        do_sample=True,
    )

    full_text = result[0]['generated_text']

    response_only = full_text.split("[/INST]")[-1].strip()

    if response_only.endswith("</s>"):
        response_only = response_only[:-len("</s>")].strip()

    return response_only


In [None]:
chat_with_bot("How to get rizz ")

In [None]:
!pip install -q gradio

import torch
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline
from peft import PeftModel

print("Setting up the model... This may take a few minutes.")

base_model_id = "meta-llama/Meta-Llama-3-8B-Instruct"

adapter_id = "notninja/chad-gpt"

# Configure quantization to save memory
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)

# Load the base model with quantization
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_id,
    quantization_config=bnb_config,
    device_map="auto",
)

# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model_id)
tokenizer.pad_token = tokenizer.eos_token

# Load your fine-tuned adapter from the correct Hub repo
model = PeftModel.from_pretrained(base_model, adapter_id)

print("✅ Model setup complete!")

generator = pipeline("text-generation", model=model, tokenizer=tokenizer)




def get_response(message, history):
    system_prompt = "You are a 'Chad' chatbot that speaks in Gen-Z slang and gives advice from that perspective."

    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": message},
    ]

    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

    result = generator(
        prompt,
        max_new_tokens=150,
        temperature=0.7,
        eos_token_id=tokenizer.eos_token_id,
    )

    full_text = result[0]['generated_text']
    response_only = full_text.split("<|start_header_id|>assistant<|end_header_id|>\n\n")[-1].strip()
    if response_only.endswith("</s>"):
        response_only = response_only[:-len("</s>")].strip()

    return response_only

print("Launching Gradio UI with the updated function...")

gr.ChatInterface(
    fn=get_response,
    title="Gen-Z Chad-Bot 🤖",
    description="Ask me anything, fam. I've been fine-tuned on the latest Gen-Z slang. It's giving... intelligence. ✨",
    examples=[
        "How do I get girls?",
        "What does 'cap' mean?",
        "What's the vibe today?",
    ],
    theme="soft"
).launch(share=True)

In [None]:
def chat_with_bot(user_prompt):
    """
    Generates a response from the fine-tuned model using a system prompt
    to enforce the desired persona.
    """
    system_prompt = "You are a 'Chad' chatbot that speaks in Gen-Z slang and gives advice from that perspective."

    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt},
    ]


    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

    # Generate the response
    result = generator(
        prompt,
        max_new_tokens=150,
        temperature=0.7,
        eos_token_id=tokenizer.eos_token_id,
    )

    # --- Clean up the output ---
    full_text = result[0]['generated_text']
    # The cleaning is slightly different because of the new template
    response_only = full_text.split("<|start_header_id|>assistant<|end_header_id|>\n\n")[-1].strip()
    if response_only.endswith("</s>"):
        response_only = response_only[:-len("</s>")].strip()

    return response_only

In [None]:
chat_with_bot("how do I get goth baddies")