<a href="https://colab.research.google.com/github/pushkar-hue/Chad-Bot/blob/main/deployment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q "fastapi[all]" uvicorn pyngrok torch transformers bitsandbytes peft accelerate requests

import os
import torch
import uvicorn
import nest_asyncio
from pyngrok import ngrok
from fastapi import FastAPI
from fastapi.responses import FileResponse
from pydantic import BaseModel
import requests
import json
import threading
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline
from peft import PeftModel

nest_asyncio.apply()

# --- PASTE YOUR KEYS HERE ---
SERPER_API_KEY = ""
NGROK_AUTH_TOKEN = ""
# ----------------------------


print("\n--- Loading your fine-tuned model... ---")
base_model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
adapter_id = "notninja/chad-gpt"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)

base_model = AutoModelForCausalLM.from_pretrained(base_model_id, quantization_config=bnb_config, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained(base_model_id)
model = PeftModel.from_pretrained(base_model, adapter_id)
generator = pipeline("text-generation", model=model, tokenizer=tokenizer)
print("✅ Model is loaded and ready!")

app = FastAPI()

class ChatRequest(BaseModel):
    message: str

@app.get("/", response_class=FileResponse)
async def read_index():
    return FileResponse('index.html')

@app.post("/chat")
async def chat_endpoint(request: ChatRequest):
    system_prompt = "You are a 'Chad' chatbot that speaks in Gen-Z slang."
    messages = [{"role": "system", "content": system_prompt}, {"role": "user", "content": request.message}]
    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    result = generator(prompt, max_new_tokens=150, temperature=0.7, eos_token_id=tokenizer.eos_token_id)
    full_text = result[0]['generated_text']
    response_only = full_text.split("<|start_header_id|>assistant<|end_header_id|>\n\n")[-1].strip()
    if response_only.endswith("</s>"): response_only = response_only[:-len("</s>")].strip()
    return {"response": response_only}

@app.post("/search")
async def search_endpoint(request: ChatRequest):
    try:
        url = "https://google.serper.dev/search"
        payload = json.dumps({"q": request.message})
        headers = {'X-API-KEY': SERPER_API_KEY, 'Content-Type': 'application/json'}
        response = requests.request("POST", url, headers=headers, data=payload)
        search_results = response.json()
        context = "\n".join([result.get('snippet', '') for result in search_results.get('organic', [])[:5]])
        if not context: context = "Couldn't find anything."
    except Exception:
        context = "Web search is down bad rn."

    system_prompt = "You are a 'Chad' chatbot that speaks in Gen-Z slang."
    user_instruction = f"Based on these web search results: --- {context[:2000]} --- Answer my original question: '{request.message}'. Keep it short and confident."
    messages = [{"role": "system", "content": system_prompt}, {"role": "user", "content": user_instruction}]
    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    result = generator(prompt, max_new_tokens=250, temperature=0.7, eos_token_id=tokenizer.eos_token_id)
    full_text = result[0]['generated_text']
    response_only = full_text.split("<|start_header_id|>assistant<|end_header_id|>\n\n")[-1].strip()
    if response_only.endswith("</s>"): response_only = response_only[:-len("</s>")].strip()
    return {"response": response_only}

# --- 5. Launch the App with ngrok ---
print("\n--- Launching app with ngrok... ---")
ngrok.set_auth_token(NGROK_AUTH_TOKEN)
threading.Thread(target=uvicorn.run, args=(app,), kwargs={"host": "0.0.0.0", "port": 8000}, daemon=True).start()
public_url = ngrok.connect(8000)
print("✅ Your FastAPI app is live at:", public_url)