<a href="https://colab.research.google.com/github/olorunfemibabalola/Bias-Detection-NLP/blob/main/Policy_Bias_Detector.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
'''
Import modules
Import the dataset
Perform exploratory data analysis
Clean the data
Split into training and testing sets
Create a model
Train the model
Make predictions
Test the model
Evaluate the model
Make predictions on new data
Persist the model for future use
Load a persisted model
Make predictions on new data
'''

--------------------------------------------------------------------------------



In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import fileinput
from transformers import pipeline

In [None]:
from google.colab import files

userChoice = input("Woud you like to input a text or a file? (text/file)").lower().strip()
while (userChoice != "text") and (userChoice != "file"):
    print("Invalid input. Please enter 'text' or 'file'")
    userChoice = input("Woud you like to input a text or a file? (text/file)").lower().strip()
else:
    print(f"You have selected {userChoice}")

if userChoice == "text":
  userText = input("Enter your text:")
else:
  print("Upload your file below:")
  #This creates the "Upload" button
  uploadedFile = files.upload()

  # The 'uploaded' variable is a dictionary:
  # Key = filename, Value = file data (bytes)
  for filename in uploadedFile.keys():
    print(f'User uploaded file "{filename}" with a length of {len(uploadedFile[filename])} bytes')




In [None]:
from PyPDF2 import PdfReader

# Define the PDF file name
# The filename variable already holds the correct name from previous steps

# Open the PDF file in binary read mode
reader = PdfReader(filename)

# Initialize an empty string to store the extracted text
user_file= ""

# Iterate through each page and extract text
for page in reader.pages:
    user_file += page.extract_text()

# Display a portion of the extracted text to verify
print(user_file[:1000])
print(f"Total characters extracted: {len(user_file)}")

In [None]:
print(user_file)

In [None]:
user_file.lower()
new = user_file.split(".")

In [None]:
# !pip install -q transformers torch

import torch
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer

# Step 1: Initialize the Auditor (RoBERTa Encoder)
# Specialized for detecting social and cognitive biases
auditor = pipeline("text-classification",
                   model="valurank/distilroberta-bias",
                   device=0 if torch.cuda.is_available() else -1)

# Step 2: Initialize the Conversationalist (GPT-2 Decoder)
chat_model_name = "openai-community/gpt2"
chat_tokenizer = AutoTokenizer.from_pretrained(chat_model_name)
chat_model = AutoModelForCausalLM.from_pretrained(chat_model_name)

def get_chatbot_response(prompt):
    # Perform Bias Audit FIRST
    audit_result = auditor(prompt)[0]

    # If Bias is detected above a 70% confidence threshold
    if audit_result['label'] == 'Biased' and audit_result['score'] > 0.7:
        return f"‚ö†Ô∏è [BIAS ALERT]: I detected potential {audit_result['label']} in your prompt. " \
               f"Please rephrase to be more inclusive."

    # Otherwise, generate a standard response
    inputs = chat_tokenizer.encode(prompt + chat_tokenizer.eos_token, return_tensors='pt')
    outputs = chat_model.generate(inputs, max_length=100, do_sample=True, top_k=50, top_p=0.95)
    return chat_tokenizer.decode(outputs[0], skip_special_tokens=True)

# Step 3: Interactive Chat Loop
print("üõ°Ô∏è Inclusive Assistant Active. Type 'quit' to stop.")
while True:
    user_input = input("You: ")
    if user_input.lower() in ['quit', 'exit']: break

    response = get_chatbot_response(user_input)
    print(f"Bot: {response}\n")


In [None]:
# Ensure you have the necessary libraries installed
# !pip install -q PyPDF2 gradio transformers torch

import PyPDF2
import gradio as gr

def extract_text_from_pdf(pdf_file):
    """Extracts text from an uploaded PDF file object."""
    reader = PyPDF2.PdfReader(pdf_file.name)
    text = ""
    for page in reader.pages:
        content = page.extract_text()
        if content:
            text += content + " "
    return text

def audit_document(file):
    """Scans the entire document and returns biased sentences."""
    if file is None:
        return "No file uploaded."

    text = extract_text_from_pdf(file)
    sentences = text.split('.')
    biased_findings = []

    for sentence in sentences:
        clean_sentence = sentence.strip()
        if len(clean_sentence) > 10: # Filter out noise
            result = auditor(clean_sentence)[0]
            if result['label'] == 'Biased' and result['score'] > 0.75:
                biased_findings.append(f"‚Ä¢ \"{clean_sentence}\" (Confidence: {result['score']:.2f})")

    if not biased_findings:
        return "‚úÖ No biased statements found. The document is safe and inclusive."
    else:
        report = "‚ö†Ô∏è Potential bias detected in the following statements:\n\n" + "\n".join(biased_findings)
        return report

# Updated Chatbot Logic with "Document Upload" Intent
def chatbot_response(message, history):
    message_lower = message.lower()

    # Feature 1: Check for Document Intent
    if any(word in message_lower for word in ["upload", "document", "file", "scan"]):
        return "Sure! Please use the **Upload File** button below to provide your document for a bias audit."

    # Feature 2: Standard Chat Audit
    res = auditor(message)[0]
    if res['label'] == 'Biased':
        return f"‚ö†Ô∏è BIAS ALERT: I detected potential {res['label']} in your input. Try using neutral phrasing."

    return "‚úÖ Inclusive input. I am ready to help with your policy questions."

# UI Layout (Using Gradio Blocks for better control)
with gr.Blocks(theme="soft") as demo:
    gr.Markdown("# üõ°Ô∏è Inclusive Policy Assistant & Auditor")

    with gr.Tab("Chat"):
        gr.ChatInterface(chatbot_response)

    with gr.Tab("Document Audit"):
        file_input = gr.File(label="Upload Corporate Policy (PDF)")
        audit_output = gr.Textbox(label="Audit Report", lines=10)
        audit_button = gr.Button("Run Audit")
        audit_button.click(audit_document, inputs=file_input, outputs=audit_output)

demo.launch(debug=True)
