In [7]:
import time
import numpy as np
import pandas as pd
from sentence_transformers import SentenceTransformer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics.pairwise import cosine_similarity

#provided configuration(drop down list)
PLATFORM_CONFIG = {
    "gaming": {"threshold": 0.6},  # Gamers talk trash; higher tolerance
    "social": {"threshold": 0.5},   # Default
    "professional": {"threshold": 0.3}, # Strict
    "kids": {"threshold": 0.15}    # Very Strict
}

# Limits(Constants)
SAFE = 0.4
UNSAFE = 0.85 

class DetoxifySmart:
    def __init__(self, config=None):
        #making default config(other values are updated in filter api section only suggestion one is made default here)
        self.config = {
            "enable_suggestions": True
        }
        if config:
            self.config.update(config)

        print("Loading lightweight AI model (all-MiniLM-L6-v2)...")
        self.embedder = SentenceTransformer('all-MiniLM-L6-v2')
        
        self.model = LogisticRegression(max_iter=1000)
        
        self.df = None
        self.toxic_embeddings = None

    # xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx Training Portion xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
    def load_and_train(self):
        url = "https://raw.githubusercontent.com/s-nlp/paradetox/main/paradetox/paradetox.tsv"
        data = pd.read_csv(url, sep="\t")
        
        #training data
        toxic_samples = data.iloc[:, 0].head(4000)
        safe_samples = data.iloc[:, 1].head(4000)
        
        #Generate Embeddings (happens once at startup)
        # converts text to 384-dimensional meaningful vectors
        X_toxic = self.embedder.encode(toxic_samples)
        X_safe = self.embedder.encode(safe_samples)
        
        X = np.vstack([X_toxic, X_safe])
        
        part1 = np.full(len(toxic_samples), 1)
        part2 = np.full(len(safe_samples), 0)
        y = np.concatenate([part1, part2])

        #training the model
        self.model.fit(X, y)

        #xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx Sugesstion Part xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
        if self.config["enable_suggestions"]:
            self.df = pd.DataFrame({
                "toxic": toxic_samples,
                "suggestion": safe_samples
            })
            self.toxic_embeddings = X_toxic # Store pre-computed vectors
            
        print("System Ready. Context-aware model loaded.")

    # xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx FILTER API xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
    #payload is ur json dictionary input
    def filter_text(self, payload):
        
        #start time -> used at end again to see time elapsed
        start = time.time()
        
        #extracting and setting the defaults (need,default)
        text = payload.get("text", "")
        platform = payload.get("platform", "social")
        
        #currently unnecessary( incase in future we add more configuration ignoring the threshold)
        current_threshold = PLATFORM_CONFIG.get(platform, {}).get("threshold", 0.5)

        # taking input
        input = self.embedder.encode([text])
        
        #Predict Toxicity
        # predict_proba returns [ProbabilitySafe, ProbabilityToxic]
        score = float(self.model.predict_proba(input)[0][1])

        #chat changing
        apireturn = text
        severity = "SAFE"
        
        if score >= UNSAFE:
            severity = "SEVERE"
            apireturn = "* [Censored] *"
            
        elif score >= current_threshold:
            severity = "MODERATE"
            
            if self.config["enable_suggestions"]:
                
                # Semantic Search => finds the toxic sentence in our DB that is closest in meaning to the user's input
                
                sims = cosine_similarity(input, self.toxic_embeddings)
                index_similiar = np.argmax(sims)
                similarity_score = sims[0][index_similiar]
                
                # Only suggest if we are somewhat confident (similarity > 0.4)
                if similarity_score > 0.4:
                    apireturn = self.df.iloc[index_similiar]["suggestion"]
                else:
                    # If we catch toxicity but don't have a good suggestion
                    apireturn = "[Comment flagged for toxicity]"
            else:
                apireturn = "[Hidden]"

        #Latency(time final - time inital[when filter function is called])
        latency = round((time.time() - start) * 1000, 2)

        return {
            "original_text": text,
            "correct_text": apireturn,
            "toxicity": round(score, 4),
            "severity": severity,
            "latencyinms": latency
        }

# test run here
if __name__ == "__main__":
    bot = DetoxifySmart()
    bot.load_and_train()
    
    # Test 1: Direct Toxicity
    print("\nTest 1:", bot.filter_text({"text": "You are so stupid and useless", "platform": "professional"}))
    
    # Test 2: Slang/Context (TF-IDF usually fails here, Embeddings work)
    print("\nTest 2:", bot.filter_text({"text": "shut your mouth", "platform": "kids"}))

Loading lightweight AI model (all-MiniLM-L6-v2)...
System Ready. Context-aware model loaded.

Test 1: {'original_text': 'You are so stupid and useless', 'correct_text': 'I just ignore you', 'toxicity': 0.8059, 'severity': 'MODERATE', 'latencyinms': 48.87}

Test 2: {'original_text': 'shut your mouth', 'correct_text': 'you have no strength', 'toxicity': 0.4799, 'severity': 'MODERATE', 'latencyinms': 33.57}
