###**Basic info about the API**
->API classifies text into content moderation categories listed below :
<br>**'explicit_nudity', 'suggestive', 'violence', 'disturbing_content', 'rude_gestures', 'alcohol', 'drugs', 'tobacco', 'hate_speech', 'safe'**

->It is built using **Meta Llama-3.1-8B-Instruct** model from Hugging Face

->API framework: **FastAPI**

###**Must to run this code succesfully**
->change the run time type of colab from `CPU` to `T4-GPU` to load the Model successfully.

->Model will not load properly on CPU.Session will crash.

->this code will prompt for two type of tokens :
* Hugging Face token : **........**
* ngrok token : **..............**

copy & paste them accordingly in prompt box

###**<font color='red'>NOTE :**

<font color='red'>**The ngrok public URL generated is temporary (only valid while the Colab notebook is running).**

<font color='red'>**It is for development/demo purposes only.**

<font color='red'>**For production deployment, use a paid ngrok plan or host the API on a dedicated server (AWS, GCP, Azure, etc.).**

**Hugging Face token requires that the user has accepted Meta’s license agreement for the model on Hugging Face Hub.</font>**

In [None]:
!pip install pyngrok  bitsandbytes -U

Collecting pyngrok
  Downloading pyngrok-7.3.0-py3-none-any.whl.metadata (8.1 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl.metadata (11 kB)
Downloading pyngrok-7.3.0-py3-none-any.whl (25 kB)
Downloading bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl (61.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.3/61.3 MB[0m [31m16.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pyngrok, bitsandbytes
Successfully installed bitsandbytes-0.47.0 pyngrok-7.3.0


In [None]:
import os, time, threading, re
import torch
from transformers import pipeline, AutoTokenizer
from fastapi import FastAPI, Request
from pyngrok import ngrok
import nest_asyncio, uvicorn

In [None]:
import os, getpass
os.environ["HF_TOKEN"] = getpass.getpass("Paste your Hugging Face token (hidden): ")

In [None]:
nest_asyncio.apply()

In [None]:
HF_TOKEN = os.environ.get("HF_TOKEN", "")
print(HF_TOKEN)

In [None]:
valid_labels = [
    "explicit_nudity","suggestive","violence","disturbing_content",
    "rude_gestures","alcohol","drugs","tobacco","hate_speech","safe"
]
labels_for_prompt = ", ".join(f"'{l}'" for l in valid_labels)
print(labels_for_prompt)

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
gpu_name = torch.cuda.get_device_name(0) if device == "cuda" else "CPU"
use_bf16 = device == "cuda" and any(k in gpu_name for k in ["A100","H100","L4"])
print(f"Device: {device} | GPU: {gpu_name} | bf16: {use_bf16}")

In [None]:
def make_generator():
  device = "cuda" if torch.cuda.is_available() else "cpu"
  gpu_name = torch.cuda.get_device_name(0) if device == "cuda" else "CPU"

  use_bf16 = device == "cuda" and any(k in gpu_name for k in ["A100","H100","L4"])
  print(f"Device: {device} | GPU: {gpu_name} | bf16: {use_bf16}")

  if use_bf16:
    return pipeline(
      "text-generation",
      model="meta-llama/Meta-Llama-3.1-8B-Instruct",
      model_kwargs={"torch_dtype": torch.bfloat16},
      device_map="auto",
      token=HF_TOKEN
    )
  else:
    # 4-bit quantization for T4, small GPUs, or CPU
    return pipeline(
      "text-generation",
      model="meta-llama/Meta-Llama-3.1-8B-Instruct",
      model_kwargs={
          "load_in_4bit": True,
          "bnb_4bit_compute_dtype": torch.bfloat16,
          "bnb_4bit_quant_type": "nf4"
      },
      device_map="auto",
      token=HF_TOKEN
    )

In [None]:
generator = make_generator()

In [None]:
# ---- Helpers ----
def normalize_labels(generated: str) -> str:
  """
  Enforce that the output is only drawn from valid_labels. Falls back to 'safe' if nothing valid is detected."""
  text = generated.lower().strip()
  # split by comma or newline
  parts = [p.strip(" '\"\t.").replace("-", "_") for p in re.split(r"[,\n]+", text)]
  picked = [p for p in parts if p in valid_labels]
  if not picked:
    if "safe" in text:
      picked = ["safe"]
  picked = sorted(set(picked))
  return ", ".join(picked) if picked else "safe"

In [None]:
def classify(text: str) -> str:
  messages = [
    {
      "role": "system",
      "content": (
        "You are an expert content moderator. Your task is to identify ALL "
        f"applicable categories for the user's text from the following list: {labels_for_prompt}. "
        "Your response MUST be a comma-separated list of the category names. "
        "If none of the categories apply, respond with only the word 'safe'."
      ),
    },
    {"role": "user", "content": f'Please classify the following text: "{text}"'},
  ]
  outputs = generator(messages, max_new_tokens=40, return_full_text=False)
  raw = outputs[0]["generated_text"].strip()
  print(raw)
  return normalize_labels(raw)

In [None]:
# ---- FastAPI app ----
app = FastAPI()

@app.get("/health")
def health():
  return {"status": "ok"}

@app.post("/classify")
async def classify_content(request: Request):
  try:
    data = await request.json()
    providers = data.get("message", {}).get("catalog", {}).get("bpp/providers", [])
    if not providers:
      return {
        "type": "CATALOG-ERROR",
        "code": "999999",
        "path": "message.catalog.bpp/providers",
        "message": "No providers found in input JSON",
        "test_type": "recommendation",
      }

    # Concatenate text fields from the first provider
    provider = providers[0]
    desc = provider.get("descriptor", {}) or {}
    text_list = desc.get("text", []) or []
    long_desc = desc.get("long_desc", "") or ""
    short_desc = desc.get("short_desc", "") or ""

    text_to_analyze = " ".join(text_list + [long_desc, short_desc]).strip()
    if not text_to_analyze:
      return {
        "type": "CATALOG-ERROR",
        "code": "999999",
        "path": "message.catalog.bpp/providers[0].descriptor",
        "message": "No text available for classification",
        "test_type": "recommendation",
      }

    t0 = time.time()
    result = classify(text_to_analyze)
    print(result)
    t1 = time.time()

    return {
      "type": "CATALOG",
      "code": "111111",
      "path": "message.catalog.bpp/providers[0].descriptor.text",
      "message": result,
      "test_type": "recommendation",
      "inference_time": f"{t1 - t0:.2f} seconds",
    }

  except Exception as e:
    return {
      "type": "CATALOG-ERROR",
      "code": "999999",
      "path": "unknown",
      "message": f"Exception: {e}",
      "test_type": "recommendation",
    }

In [None]:
def run_uvicorn():
  uvicorn.run(app, host="0.0.0.0", port=8000, log_level="info")

# Start the API server (background thread)
server_thread = threading.Thread(target=run_uvicorn, daemon=True)
server_thread.start()

In [None]:
public_url = None
os.environ["ngrok_TOKEN"] = getpass.getpass("Paste your ngrok token (hidden): ")
NGROK_AUTH = os.environ.get("ngrok_TOKEN", "")
print(NGROK_AUTH)
try:
  if NGROK_AUTH:
    ngrok.set_auth_token(NGROK_AUTH)
  public_url = ngrok.connect(8000, "http").public_url
  print("Public URL:", public_url)
except Exception as e:
  print("Ngrok not started (you can ignore this if you don't need external access):", e)

###**-----------------------------------------trying the api-----------------------**

In [None]:
import requests

payload = {
  "message": {
    "catalog": {
      "bpp/providers": [
        {
          "descriptor": {
            "text": [
              "He was drunk. He drew his sword and charged, intending to cut them down where they stood."
            ],
            "long_desc": "",
            "short_desc": ""
          },
          "id": "1268365919"
        }
      ]
    }
  }
}

# Local call
r_local = requests.post("http://127.0.0.1:8000/classify", json=payload, timeout=60)
print("Local response:", r_local.json())

# Public call (if you got an ngrok URL)
if 'public_url' in globals() and public_url:
    r_public = requests.post(f"{public_url}/classify", json=payload, timeout=60)
    print("Public response:", r_public.json())
