In [5]:

# ELECTRA SUICIDAL TEXT DETECTOR ‚Äî TESTING NOTEBOOK


import torch
from transformers import ElectraForSequenceClassification, ElectraTokenizer
from sklearn.preprocessing import LabelEncoder
import pandas as pd
import os
import warnings
warnings.filterwarnings('ignore')


# CONNECT GOOGLE DRIVE

from google.colab import drive
drive.mount('/content/drive')


# LOAD SAVED MODEL & TOKENIZER
print("=" * 90)
print("üîπ LOADING ELECTRA MODEL FROM DRIVE")
print("=" * 90)

model_dir = "/content/drive/MyDrive/electra_suicidal_text_detector"

if os.path.exists(model_dir):
    print(f"\n‚úì Model directory found: {model_dir}")
    print("‚úì Files in directory:")
    for file in os.listdir(model_dir):
        print(f"  - {file}")
else:
    raise FileNotFoundError(f"‚úó ERROR: Model directory not found at {model_dir}")

# Load tokenizer and model
tokenizer = ElectraTokenizer.from_pretrained(model_dir, local_files_only=True)
model = ElectraForSequenceClassification.from_pretrained(model_dir, local_files_only=True)

# Device setup
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
model.eval()

# Label encoder for decoding predictions
label_encoder = LabelEncoder()
label_encoder.fit(['non-suicide', 'suicide'])

print("\n‚úÖ Model and tokenizer loaded successfully!")
print(f"Device: {device}")
print("=" * 90)



# PREDICTION FUNCTION
def predict_suicidal_text(text):
    """Predict if text contains suicidal intent"""
    encoding = tokenizer.encode_plus(
        text,
        add_special_tokens=True,
        max_length=128,
        return_token_type_ids=False,
        padding='max_length',
        truncation=True,
        return_attention_mask=True,
        return_tensors='pt'
    )

    input_ids = encoding['input_ids'].to(device)
    attention_mask = encoding['attention_mask'].to(device)

    with torch.no_grad():
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs.logits
        probs = torch.softmax(logits, dim=1)
        prediction = torch.argmax(probs, dim=1)

    class_name = label_encoder.inverse_transform(prediction.cpu().numpy())[0]
    confidence = probs[0][prediction.item()].item()

    return {
        'text': text,
        'prediction': class_name,
        'confidence': confidence,
        'probabilities': {
            'non-suicidal': probs[0][0].item(),
            'suicidal': probs[0][1].item()
        }
    }



# BATCH PREDICTION ON SAMPLE TEXTS
print("\nüîπ TESTING MODEL ON SAMPLE TEXTS\n")

sample_texts = [
    "I am done with this life, I can‚Äôt take it anymore.",
    "I feel good today, finally getting things done.",
    "I can't take this pain anymore, nobody cares about me.",
    "It‚Äôs a beautiful day outside, I love my family.",
    "i want to jump off the building",
    "i can't keep doing this much longer",
    "I dont see the point of living anymore",
    "I don‚Äôt want to live anymore, everything feels pointless.",
"I‚Äôm thinking about ending my life tonight, I can‚Äôt handle the pain."
]

results = []
for i, text in enumerate(sample_texts, 1):
    result = predict_suicidal_text(text)
    results.append(result)
    print(f"\n{'‚îÄ'*80}")
    print(f"Example {i}:")
    print(f"Text: {result['text']}")
    print(f"Prediction: {result['prediction'].upper()} ({result['confidence']:.2%})")
    print(f"‚Üí Non-Suicidal: {result['probabilities']['non-suicidal']:.4f}")
    print(f"‚Üí Suicidal: {result['probabilities']['suicidal']:.4f}")

# Summary table
summary_df = pd.DataFrame([
    {
        'Text': r['text'][:60] + ('...' if len(r['text']) > 60 else ''),
        'Prediction': r['prediction'].upper(),
        'Confidence': f"{r['confidence']:.2%}",
        'Non-Suicidal': f"{r['probabilities']['non-suicidal']:.4f}",
        'Suicidal': f"{r['probabilities']['suicidal']:.4f}"
    }
    for r in results
])

print("\n\n" + "="*90)
print("üîπ BATCH PREDICTIONS SUMMARY")
print("="*90)
print(summary_df.to_string(index=False))

# # Save results to CSV
# summary_df.to_csv("batch_predictions.csv", index=False)
# print("\n‚úì Predictions saved to 'batch_predictions.csv'")

# INTERACTIVE USER INPUT PREDICTION
print("\n" + "="*90)
print("üîπ INTERACTIVE USER INPUT PREDICTION")
print("="*90)
print("Type your own sentence (or 'quit' to exit):\n")

while True:
    user_input = input("Enter text: ").strip()
    if user_input.lower() == 'quit':
        print("Exiting... üëã")
        break
    if not user_input:
        print("‚ö†Ô∏è Please type something!")
        continue

    result = predict_suicidal_text(user_input)
    print("\n" + "-"*80)
    print(f"Prediction: {result['prediction'].upper()} ({result['confidence']:.2%})")
    print(f"‚Üí Non-Suicidal: {result['probabilities']['non-suicidal']:.4f}")
    print(f"‚Üí Suicidal: {result['probabilities']['suicidal']:.4f}")
    print("-"*80)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
üîπ LOADING ELECTRA MODEL FROM DRIVE

‚úì Model directory found: /content/drive/MyDrive/electra_suicidal_text_detector
‚úì Files in directory:
  - config.json
  - model.safetensors
  - special_tokens_map.json
  - tokenizer_config.json
  - vocab.txt

‚úÖ Model and tokenizer loaded successfully!
Device: cpu

üîπ TESTING MODEL ON SAMPLE TEXTS


‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
Example 1:
Text: I am done with this life, I can‚Äôt take it anymore.
Prediction: SUICIDE (60.04%)
‚Üí Non-Suicidal: 0.3996
‚Üí Suicidal: 0.6004

‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ