In [None]:
!pip install "transformers>=4.43.0" "accelerate>=0.33.0" "bitsandbytes>=0.43.0" torch torchvision --upgrade

Collecting torchvision
  Downloading torchvision-0.23.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (6.1 kB)
Downloading torchvision-0.23.0-cp311-cp311-manylinux_2_28_x86_64.whl (8.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.6/8.6 MB[0m [31m65.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torchvision
  Attempting uninstall: torchvision
    Found existing installation: torchvision 0.21.0+cu124
    Uninstalling torchvision-0.21.0+cu124:
      Successfully uninstalled torchvision-0.21.0+cu124
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
fastai 2.7.19 requires torch<2.7,>=1.10, but you have torch 2.8.0 which is incompatible.[0m[31m
[0mSuccessfully installed torchvision-0.23.0


In [None]:
!pip install --upgrade pip
!pip install --upgrade "transformers>=4.43.0" accelerate bitsandbytes

Collecting pip
  Downloading pip-25.2-py3-none-any.whl.metadata (4.7 kB)
Downloading pip-25.2-py3-none-any.whl (1.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m26.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 24.1.2
    Uninstalling pip-24.1.2:
      Successfully uninstalled pip-24.1.2
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
fastai 2.7.19 requires torch<2.7,>=1.10, but you have torch 2.8.0 which is incompatible.[0m[31m
[0mSuccessfully installed pip-25.2


In [None]:
import json
import re
from typing import Dict, Any, List, Optional

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
from threading import Thread

In [None]:
LABELS = [
    "Explicit Nudity Detection",
    "Suggestive Content Detection",
    "Violence Detection",
    "Rude Gestures Detection",
    "Alcohol Content Detection",
    "Drugs Content Detection",
    "Tobacco Content Detection",
    "Hate Symbol Detection",
]

SYSTEM_PROMPT = """You are a careful, literal content-safety classifier.
Return ONLY valid JSON that exactly matches the requested schema.
Do not add markdown fences, comments, or extra text."""

INSTRUCTION_TEMPLATE = """Classify the INPUT text into the following labels.

Labels (binary flags):
- Explicit Nudity Detection
- Suggestive Content Detection
- Violence Detection
- Rude Gestures Detection
- Alcohol Content Detection
- Drugs Content Detection
- Tobacco Content Detection
- Hate Symbol Detection

Rules:
- Output MUST be a single JSON object.
- For each label, set "flag" to 0 (absent) or 1 (present).
- Provide a brief "rationale" (<= 20 words) explaining why or stating absence.
- Keep rationales neutral and professional.
- Do not include any keys not specified in the schema.

Schema (example with all zeros):
{{
  "Explicit Nudity Detection":    {{"flag": 0, "rationale": "No sexual body parts or explicit acts."}},
  "Suggestive Content Detection": {{"flag": 0, "rationale": "No flirtatious or implicitly sexual wording."}},
  "Violence Detection":           {{"flag": 0, "rationale": "No harm, threats, or injury described."}},
  "Rude Gestures Detection":      {{"flag": 0, "rationale": "No obscene gestures mentioned."}},
  "Alcohol Content Detection":    {{"flag": 0, "rationale": "No alcohol references."}},
  "Drugs Content Detection":      {{"flag": 0, "rationale": "No illicit or prescription drug misuse."}},
  "Tobacco Content Detection":    {{"flag": 0, "rationale": "No tobacco references."}},
  "Hate Symbol Detection":        {{"flag": 0, "rationale": "No hate symbols or extremist insignia."}}
}}

INPUT:
\"\"\"{text}\"\"\"
"""

In [None]:
# ----------------------------
# Config
# ----------------------------
MODEL_ID = "meta-llama/Llama-3.1-8B-Instruct"
LOAD_4BIT = True              # set to False if you want full precision (needs more VRAM/RAM)
MAX_NEW_TOKENS = 512
TEMPERATURE = 0.0             # deterministic for classification
TOP_P = 0.9
DEVICE_MAP = "auto"           # let Accelerate place layers
TORCH_DTYPE = torch.bfloat16 if torch.cuda.is_available() else torch.float32

In [None]:
hf_token = '............'
def load_model():
  kwargs = dict(
    device_map=DEVICE_MAP,
    torch_dtype=TORCH_DTYPE,
  )
  if LOAD_4BIT and torch.cuda.is_available():
    kwargs.update(
      dict(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=TORCH_DTYPE,
      )
    )
  model = AutoModelForCausalLM.from_pretrained(MODEL_ID, trust_remote_code=False, **kwargs,token=hf_token)
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=True,token=hf_token)
  return model, tokenizer

In [None]:
def build_prompt(text: str, tokenizer: AutoTokenizer) -> Dict[str, List[Dict[str, str]]]:
  # Use the chat template expected by *-Instruct models
  messages = [{"role": "system", "content": SYSTEM_PROMPT},{"role": "user", "content": INSTRUCTION_TEMPLATE.format(text=text)},]
  prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
  return prompt

In [None]:
def generate_json(model, tokenizer, prompt: str) -> str:
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
  gen_kwargs = dict(
    **inputs,
    streamer=streamer,
    max_new_tokens=MAX_NEW_TOKENS,
    temperature=TEMPERATURE,
    top_p=TOP_P,
    do_sample=(TEMPERATURE > 0.0),
    eos_token_id=tokenizer.eos_token_id,)
  thread = Thread(target=model.generate, kwargs=gen_kwargs)
  thread.start()
  chunks = []
  for part in streamer:
    chunks.append(part)
  thread.join()
  return "".join(chunks).strip()

In [None]:
def extract_json(text: str) -> Optional[str]:
  """
  Try to extract a JSON object from the model text (in case it strays).
  Prefer the largest outermost {...}.
  """
  # If it already looks like clean JSON, use it
  if text.startswith("{") and text.endswith("}"):
    return text
  # Otherwise, try to find the first and last curly braces
  match = re.search(r"\{.*\}", text, flags=re.S)
  return match.group(0) if match else None

In [None]:
def validate_output(obj: Any) -> Dict[str, Any]:
  """
  Ensure all labels exist and each has {"flag": 0|1, "rationale": str}.
  Fill missing labels with zeros.
  """
  result = {}
  for label in LABELS:
    entry = obj.get(label, None) if isinstance(obj, dict) else None
    if not isinstance(entry, dict):
      result[label] = {"flag": 0, "rationale": "Not mentioned."}
      continue
    flag = entry.get("flag", 0)
    flag = 1 if str(flag).strip() == "1" else 0
    rationale = entry.get("rationale", "No rationale provided.")
    if not isinstance(rationale, str):
      rationale = str(rationale)
    result[label] = {"flag": flag, "rationale": rationale[:160]}
  return result

In [None]:
def classify_text(text: str, model=None, tokenizer=None, retry_on_parse_error: bool = True) -> Dict[str, Any]:
  owned = False
  if model is None or tokenizer is None:
    model, tokenizer = load_model()
    owned = True
  try:
    prompt = build_prompt(text, tokenizer)
    raw = generate_json(model, tokenizer, prompt)
    jtxt = extract_json(raw)
    if jtxt is None:
        if retry_on_parse_error:
            # One gentle retry with a stricter reminder
            retry_prompt = build_prompt(
                text + "\n\nREMINDER: Output ONLY valid JSON object. No backticks. No extra text.",
                tokenizer
            )
            raw = generate_json(model, tokenizer, retry_prompt)
            jtxt = extract_json(raw)
    if jtxt is None:
        raise ValueError(f"Model did not return JSON. Raw output:\n{raw}")

    parsed = json.loads(jtxt)
    return {
        "raw_text": text,
        "model_output": validate_output(parsed),
    }
  finally:
    # If you want to free VRAM/RAM when model was created inside this call:
    if owned:
      try:
        del model, tokenizer
        if torch.cuda.is_available():
          torch.cuda.empty_cache()
      except Exception:
        pass

In [None]:



# ----------------------------
# Example usage
# ----------------------------
if __name__ == "__main__":
  sample_texts = [
    "They were clinking beer bottles at the bar after work.",
    "The poster shows a skull with a swastika armband.",
    "He threatened to punch me if I didn’t pay.",
    "She winked and said the dress is 'barely decent'.",
    "The ad shows a man flipping the bird.",
    "He rolled a joint and lit it.",
    "Smoking a pack a day is terrible for your lungs.",
    "The content describes explicit sexual acts and exposed genitals.",
  ]

  model, tokenizer = load_model()
  for t in sample_texts:
    res = classify_text(t, model, tokenizer)
    print(json.dumps(res, indent=2, ensure_ascii=False))


ValueError: Could not find LlamaForCausalLM neither in <module 'transformers.models.llama' from '/usr/local/lib/python3.11/dist-packages/transformers/models/llama/__init__.py'> nor in <module 'transformers' from '/usr/local/lib/python3.11/dist-packages/transformers/__init__.py'>!