# 1. Environment Setup

In [10]:
# Install required packages
!pip install transformers spacy gradio
!python -m spacy download en_core_web_md

# Import libraries
import torch
import spacy
import re
import json
import random
import pandas as pd
import numpy as np
from transformers import pipeline, AutoTokenizer
from google.colab import files
import matplotlib.pyplot as plt
import seaborn as sns

Collecting en-core-web-md==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.8.0/en_core_web_md-3.8.0-py3-none-any.whl (33.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m33.5/33.5 MB[0m [31m71.8 MB/s[0m eta [36m0:00:00[0m
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_md')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [11]:
# Check if GPU is available
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    device = torch.device("cuda")
    print(f"Using GPU: {torch.cuda.get_device_name(0)}")
else:
    device = torch.device("cpu")
    print("Using CPU")

PyTorch version: 2.6.0+cu124
CUDA available: True
Using GPU: NVIDIA L4


# 2. Text Processing Pipeline

In [13]:
class TextProcessor:
    def __init__(self):
        self.nlp = spacy.load("en_core_web_md")
        self.tokenizer = AutoTokenizer.from_pretrained("roberta-large-mnli")

    def preprocess(self, text):
        # Clean text
        text = re.sub(r'\s+', ' ', text).strip()

        # Process with spaCy
        doc = self.nlp(text)

        # Split into sentences
        sentences = [sent.text.strip() for sent in doc.sents]

        # Tokenize for transformer
        tokens = self.tokenizer(text, return_tensors="pt",
                               padding=True, truncation=True)

        return {
            "raw_text": text,
            "sentences": sentences,
            "tokens": tokens,
            "doc": doc
        }

 # 3. Zero-Shot Classification System

In [14]:
class ZeroShotAnalyzer:
    def __init__(self):
        # Use GPU if available
        self.classifier = pipeline("zero-shot-classification",
                                  model="facebook/bart-large-mnli",
                                  device=0 if torch.cuda.is_available() else -1)

    def classify_feedback_type(self, text):
        # Define feedback type labels
        labels = ["positive feedback", "constructive criticism",
                 "actionable feedback", "vague feedback"]

        # Run zero-shot classification
        result = self.classifier(text, labels, multi_label=True)
        return result

    def detect_bias(self, text):
        # Define bias category labels
        labels = ["gender-biased language", "racially-biased language",
                 "personality-biased language", "neutral language"]

        # Run zero-shot classification
        result = self.classifier(text, labels, multi_label=True)
        return result

    def analyze_specificity(self, text):
        # Define specificity labels
        labels = ["specific feedback", "general feedback",
                 "measurable feedback", "subjective feedback"]

        # Run zero-shot classification
        result = self.classifier(text, labels, multi_label=True)
        return result

# 4. Bias Detection Engine

In [15]:
class BiasDetector:
    def __init__(self, zero_shot_analyzer):
        self.analyzer = zero_shot_analyzer
        # Create basic lexicons in-memory instead of loading from files
        self.gender_terms = self._create_gender_lexicon()
        self.cultural_terms = self._create_cultural_lexicon()

    def _create_gender_lexicon(self):
        # Simple in-memory gender lexicon
        return {
            "masculine_coded": ["aggressive", "ambitious", "analytical", "assertive", "confident", "dominant", "forceful", "independent", "logical"],
            "feminine_coded": ["collaborative", "compassionate", "emotional", "empathetic", "nurturing", "sensitive", "supportive", "warm"],
            "gendered_terms": ["he", "she", "him", "her", "his", "hers", "himself", "herself"]
        }

    def _create_cultural_lexicon(self):
        # Simple in-memory cultural bias lexicon
        return {
            "cultural_stereotypes": ["articulate", "well-spoken", "exotic", "diverse fit", "cultural fit"]
        }

    def _check_lexicon(self, sentence, lexicon_dict):
        sentence = sentence.lower()
        found_terms = []

        # Check each category in the lexicon
        for category, terms in lexicon_dict.items():
            for term in terms:
                if f" {term} " in f" {sentence} " or sentence.startswith(f"{term} ") or sentence.endswith(f" {term}"):
                    found_terms.append(term)

        return found_terms

    def _combine_results(self, zs_results, lexicon_results):
        # Combine zero-shot and lexicon-based results
        # For this simplified version, we'll just return both
        return {
            "zero_shot": zs_results,
            "lexicon_based": lexicon_results
        }

    def detect_bias(self, processed_text):
        results = []

        # Zero-shot classification
        zs_results = self.analyzer.detect_bias(processed_text["raw_text"])

        # Lexicon-based detection
        for sentence in processed_text["sentences"]:
            # Check gender bias markers
            gender_markers = self._check_lexicon(sentence, self.gender_terms)
            if gender_markers:
                results.append({
                    "sentence": sentence,
                    "bias_type": "gender",
                    "markers": gender_markers,
                    "confidence": 0.8 if len(gender_markers) > 1 else 0.6
                })

            # Check cultural bias markers
            cultural_markers = self._check_lexicon(sentence, self.cultural_terms)
            if cultural_markers:
                results.append({
                    "sentence": sentence,
                    "bias_type": "cultural",
                    "markers": cultural_markers,
                    "confidence": 0.8 if len(cultural_markers) > 1 else 0.6
                })

        # Combine results from zero-shot and lexicon approaches
        combined_results = self._combine_results(zs_results, results)
        return combined_results

# 5. Specificity Analyzer

In [16]:
class SpecificityAnalyzer:
    def __init__(self, zero_shot_analyzer):
        self.analyzer = zero_shot_analyzer
        # Patterns for vague language
        self.vague_patterns = [
            r"good job",
            r"needs improvement",
            r"work harder",
            r"be more proactive",
            r"communication skills",
            r"team player",
            r"meets expectations"
        ]

    def analyze_specificity(self, processed_text):
        results = []

        # Zero-shot analysis
        zs_results = self.analyzer.analyze_specificity(processed_text["raw_text"])

        # Pattern-based detection
        for i, sentence in enumerate(processed_text["sentences"]):
            for pattern in self.vague_patterns:
                if re.search(pattern, sentence, re.IGNORECASE):
                    results.append({
                        "sentence_id": i,
                        "sentence": sentence,
                        "issue": "vague language",
                        "pattern": pattern
                    })

        # Check for measurable outcomes
        has_metrics = any(re.search(r'\d+%|\d+ percent|increased by', s)
                         for s in processed_text["sentences"])

        return {
            "sentence_issues": results,
            "has_measurable_outcomes": has_metrics,
            "zero_shot_results": zs_results
        }

# 6. Recommendation Engine

In [17]:
class RecommendationEngine:
    def __init__(self):
        # Load suggestion templates
        self.templates = {
            "vague_feedback": [
                "Consider replacing '{original}' with specific examples: '{suggestion}'",
                "Make this more actionable by adding metrics: '{suggestion}'"
            ],
            "bias": [
                "This phrase could show bias: '{original}'. Consider: '{suggestion}'",
                "For more inclusive language, try: '{suggestion}' instead of '{original}'"
            ]
        }

        # Load specific alternatives for common issues
        self.alternatives = {
            "good job": [
                "completed {project} ahead of schedule, resulting in {outcome}",
                "exceeded the target of {metric} by {amount}"
            ],
            "needs improvement": [
                "could increase {metric} by focusing on {specific_area}",
                "would benefit from developing skills in {skill_area}"
            ],
            "work harder": [
                "could dedicate more time to {specific_task}",
                "might prioritize {important_aspect} to improve outcomes"
            ],
            "be more proactive": [
                "could anticipate {specific_need} before being asked",
                "might identify opportunities to improve {process} independently"
            ],
            "aggressive": [
                "presents ideas with conviction",
                "advocates strongly for their position",
                "communicates directly and clearly"
            ],
            "emotional": [
                "shows passion for the work",
                "demonstrates strong engagement",
                "cares deeply about team outcomes"
            ]
        }

    def _generate_bias_alternative(self, bias_issue):
        # Generate alternative for biased language
        for marker in bias_issue["markers"]:
            if marker.lower() in self.alternatives:
                return random.choice(self.alternatives[marker.lower()])

        # Default fallback suggestion
        if bias_issue["bias_type"] == "gender":
            return "using more gender-neutral language"
        else:
            return "focusing on specific behaviors rather than personal attributes"

    def _generate_specificity_alternative(self, specificity_issue):
        # Generate alternative for vague language
        pattern = specificity_issue["pattern"].lower()

        if pattern in self.alternatives:
            suggestion_template = random.choice(self.alternatives[pattern])

            # For a real system, we would fill in the placeholders with relevant content
            # For this prototype, we'll use generic examples
            if "{project}" in suggestion_template:
                suggestion_template = suggestion_template.replace("{project}", "the database migration")
            if "{outcome}" in suggestion_template:
                suggestion_template = suggestion_template.replace("{outcome}", "a 15% performance improvement")
            if "{metric}" in suggestion_template:
                suggestion_template = suggestion_template.replace("{metric}", "customer satisfaction")
            if "{amount}" in suggestion_template:
                suggestion_template = suggestion_template.replace("{amount}", "10%")
            if "{specific_area}" in suggestion_template:
                suggestion_template = suggestion_template.replace("{specific_area}", "email response time")
            if "{skill_area}" in suggestion_template:
                suggestion_template = suggestion_template.replace("{skill_area}", "technical documentation")
            if "{specific_task}" in suggestion_template:
                suggestion_template = suggestion_template.replace("{specific_task}", "planning stages")
            if "{important_aspect}" in suggestion_template:
                suggestion_template = suggestion_template.replace("{important_aspect}", "quality assurance")
            if "{specific_need}" in suggestion_template:
                suggestion_template = suggestion_template.replace("{specific_need}", "client requirements")
            if "{process}" in suggestion_template:
                suggestion_template = suggestion_template.replace("{process}", "team workflows")

            return suggestion_template

        # Default fallback
        return "providing specific examples with measurable outcomes"

    def generate_recommendations(self, analysis_results):
        recommendations = []

        # Process bias issues
        if "lexicon_based" in analysis_results.get("bias_issues", {}):
            for bias in analysis_results["bias_issues"]["lexicon_based"]:
                template = random.choice(self.templates["bias"])
                suggestion = self._generate_bias_alternative(bias)
                recommendations.append({
                    "type": "bias",
                    "original": bias["sentence"],
                    "suggestion": template.format(
                        original=bias["markers"][0] if bias["markers"] else "this phrase",
                        suggestion=suggestion
                    )
                })

        # Process specificity issues
        if "sentence_issues" in analysis_results.get("specificity_issues", {}):
            for issue in analysis_results["specificity_issues"]["sentence_issues"]:
                template = random.choice(self.templates["vague_feedback"])
                suggestion = self._generate_specificity_alternative(issue)
                recommendations.append({
                    "type": "specificity",
                    "original": issue["sentence"],
                    "suggestion": template.format(
                        original=issue["pattern"],
                        suggestion=suggestion
                    )
                })

        return recommendations

# 7. Integrated Analysis Pipeline

In [18]:
class PerformanceReviewAnalyzer:
    def __init__(self):
        self.text_processor = TextProcessor()
        self.zero_shot = ZeroShotAnalyzer()
        self.bias_detector = BiasDetector(self.zero_shot)
        self.specificity_analyzer = SpecificityAnalyzer(self.zero_shot)
        self.recommender = RecommendationEngine()

    def _check_objective_alignment(self, processed_text, objectives):
        # Simple objective alignment check
        # For each objective, check if it's mentioned in the review
        alignment = []

        for objective in objectives:
            if not objective.strip():  # Skip empty objectives
                continue

            # Check if objective keywords appear in the text
            objective_words = set(word.lower() for word in re.findall(r'\w+', objective)
                                if len(word) > 3)  # Only consider words longer than 3 chars

            text_words = set(word.lower() for word in re.findall(r'\w+', processed_text["raw_text"])
                           if len(word) > 3)

            # Calculate overlap
            common_words = objective_words.intersection(text_words)

            alignment.append({
                "objective": objective,
                "mentioned": len(common_words) > 0,
                "overlap_score": len(common_words) / len(objective_words) if objective_words else 0
            })

        return alignment

    def analyze(self, review_text, objectives=None):
        if not review_text.strip():
            return {
                "error": "No review text provided"
            }

        # Process text
        processed = self.text_processor.preprocess(review_text)

        # Run analysis engines
        bias_results = self.bias_detector.detect_bias(processed)
        specificity_results = self.specificity_analyzer.analyze_specificity(processed)
        feedback_type = self.zero_shot.classify_feedback_type(review_text)

        # Check alignment with objectives if provided
        objective_alignment = None
        if objectives:
            objective_list = objectives.split('\n') if isinstance(objectives, str) else objectives
            objective_alignment = self._check_objective_alignment(processed, objective_list)

        # Combine all analysis results
        analysis_results = {
            "bias_issues": bias_results,
            "specificity_issues": specificity_results,
            "feedback_type": feedback_type,
            "objective_alignment": objective_alignment
        }

        # Generate recommendations
        recommendations = self.recommender.generate_recommendations(analysis_results)

        return {
            "analysis": analysis_results,
            "recommendations": recommendations
        }

8. Visualization Functions

In [19]:
def visualize_feedback_types(results):
    """Visualize feedback type distribution"""
    feedback_types = results["analysis"]["feedback_type"]["labels"]
    scores = results["analysis"]["feedback_type"]["scores"]

    plt.figure(figsize=(10, 6))
    bars = plt.bar(feedback_types, scores, color=sns.color_palette("viridis", len(feedback_types)))

    # Add a horizontal line at 0.5 for reference
    plt.axhline(y=0.5, color='r', linestyle='--', alpha=0.3)

    plt.title('Feedback Type Distribution', fontsize=15)
    plt.ylabel('Confidence Score', fontsize=12)
    plt.ylim(0, 1)
    plt.xticks(rotation=30, ha='right')

    # Add value labels on top of bars
    for bar in bars:
        height = bar.get_height()
        plt.text(bar.get_x() + bar.get_width()/2., height + 0.02,
                f'{height:.2f}', ha='center', va='bottom')

    plt.tight_layout()
    plt.show()

def visualize_objective_alignment(results):
    """Visualize alignment with objectives"""
    if not results["analysis"].get("objective_alignment"):
        print("No objectives provided for alignment visualization.")
        return

    objectives = [item["objective"] if len(item["objective"]) < 30
                 else item["objective"][:27] + "..."
                 for item in results["analysis"]["objective_alignment"]]

    overlap_scores = [item["overlap_score"] for item in results["analysis"]["objective_alignment"]]
    mentioned = [item["mentioned"] for item in results["analysis"]["objective_alignment"]]

    plt.figure(figsize=(10, 6))
    bars = plt.barh(objectives, overlap_scores, color=[
        'green' if m else 'red' for m in mentioned
    ])

    plt.title('Objective Alignment', fontsize=15)
    plt.xlabel('Overlap Score', fontsize=12)
    plt.xlim(0, 1)

    # Add value labels
    for bar in bars:
        width = bar.get_width()
        plt.text(width + 0.02, bar.get_y() + bar.get_height()/2.,
                f'{width:.2f}', va='center')

    plt.tight_layout()
    plt.show()

def display_analysis_summary(results):
    """Display a text summary of the analysis"""
    print("=" * 60)
    print("PERFORMANCE REVIEW ANALYSIS SUMMARY")
    print("=" * 60)

    # Feedback type
    print("\nPREDOMINANT FEEDBACK TYPE:")
    feedback = results["analysis"]["feedback_type"]
    top_feedback = feedback["labels"][0]
    top_score = feedback["scores"][0]
    print(f"- {top_feedback.title()} ({top_score:.2f} confidence)")

    # Bias issues
    print("\nPOTENTIAL BIAS DETECTED:")
    if "lexicon_based" in results["analysis"]["bias_issues"]:
        bias_issues = results["analysis"]["bias_issues"]["lexicon_based"]
        if bias_issues:
            for issue in bias_issues:
                print(f"- {issue['bias_type'].title()} bias: \"{issue['sentence']}\"")
                print(f"  Concerning terms: {', '.join(issue['markers'])}")
        else:
            print("- No significant bias detected")

    # Specificity issues
    print("\nVAGUE LANGUAGE:")
    specificity_issues = results["analysis"]["specificity_issues"]["sentence_issues"]
    if specificity_issues:
        for issue in specificity_issues:
            print(f"- \"{issue['sentence']}\"")
    else:
        print("- No vague language detected")

    # Objective alignment
    if results["analysis"].get("objective_alignment"):
        print("\nOBJECTIVE ALIGNMENT:")
        for obj in results["analysis"]["objective_alignment"]:
            status = "✓ Mentioned" if obj["mentioned"] else "✗ Not addressed"
            print(f"- {obj['objective']}: {status}")

    # Recommendations
    print("\nKEY RECOMMENDATIONS:")
    if results["recommendations"]:
        for i, rec in enumerate(results["recommendations"], 1):
            print(f"{i}. Original: \"{rec['original']}\"")
            print(f"   Suggestion: {rec['suggestion']}")
    else:
        print("- No specific recommendations")

    print("\n" + "=" * 60)

9. Gradio Interface

In [None]:
import gradio as gr

# Create analyzer instance
analyzer = PerformanceReviewAnalyzer()

def analyze_review(objectives, review_text):
    if not review_text:
        return "Please enter some review text to analyze."

    # Run analysis
    results = analyzer.analyze(review_text, objectives)

    # Generate visualizations
    visualize_feedback_types(results)
    if results["analysis"].get("objective_alignment"):
        visualize_objective_alignment(results)

    # Display text summary
    display_analysis_summary(results)

    # Format results for Gradio markdown output
    output = "## Analysis Results\n\n"

    output += "### Detected Issues\n\n"

    # Bias issues
    output += "#### Potential Bias\n"
    if "lexicon_based" in results["analysis"]["bias_issues"]:
        bias_issues = results["analysis"]["bias_issues"]["lexicon_based"]
        if bias_issues:
            for issue in bias_issues:
                output += f"- **{issue['bias_type'].title()} bias:** \"{issue['sentence']}\"\n"
                output += f"  - Concerning terms: {', '.join(issue['markers'])}\n"
        else:
            output += "- No significant bias detected\n"

    # Specificity issues
    output += "\n#### Vague Language\n"
    specificity_issues = results["analysis"]["specificity_issues"]["sentence_issues"]
    if specificity_issues:
        for issue in specificity_issues:
            output += f"- \"{issue['sentence']}\"\n"
    else:
        output += "- No vague language detected\n"

    # Objective alignment
    if results["analysis"].get("objective_alignment"):
        output += "\n#### Objective Alignment\n"
        for obj in results["analysis"]["objective_alignment"]:
            status = "✓ Mentioned" if obj["mentioned"] else "✗ Not addressed"
            output += f"- {obj['objective']}: {status}\n"

    # Recommendations
    output += "\n### Recommendations\n\n"
    if results["recommendations"]:
        for i, rec in enumerate(results["recommendations"], 1):
            output += f"**Recommendation {i}:**\n"
            output += f"- **Original:** \"{rec['original']}\"\n"
            output += f"- **Suggestion:** {rec['suggestion']}\n\n"
    else:
        output += "- No specific recommendations\n"

    return output

# Create Gradio interface
demo = gr.Interface(
    fn=analyze_review,
    inputs=[
        gr.Textbox(lines=5, label="Performance Objectives (one per line)", placeholder="Enter objectives..."),
        gr.Textbox(lines=10, label="Performance Review Text", placeholder="Enter your review text...")
    ],
    outputs=gr.Markdown(),
    title="Performance Review Analysis System",
    description="Analyze performance reviews for bias, specificity, and alignment with objectives.",
    examples=[
        ["Increase sales by 10% in Q3\nComplete project X by November 15th",
         "John has done a good job this year. He completed the database migration project and worked on the new API. He needs to improve his communication and be more proactive."],
        ["Improve team collaboration\nDeliver quarterly reports on time",
         "Sarah is an aggressive manager who pushes her team hard. She has good ideas but sometimes she is too emotional in meetings. Her team has delivered good results."]
    ]
)

# Launch the interface
demo.launch(debug=True)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/688 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.15k [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Device set to use cuda:0


It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://2569bef625d9b66d98.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


PERFORMANCE REVIEW ANALYSIS SUMMARY

PREDOMINANT FEEDBACK TYPE:
- Positive Feedback (0.66 confidence)

POTENTIAL BIAS DETECTED:
- Gender bias: "He completed the database migration project ahead of schedule and worked on the new API integration."
  Concerning terms: he
- Gender bias: "He needs to improve his communication with the team and be more proactive in identifying potential issues before they become problems."
  Concerning terms: he, his

VAGUE LANGUAGE:
- "John has done a good job this year."
- "He needs to improve his communication with the team and be more proactive in identifying potential issues before they become problems."

OBJECTIVE ALIGNMENT:
- Growth 10%: ✗ Not addressed

KEY RECOMMENDATIONS:
1. Original: "He completed the database migration project ahead of schedule and worked on the new API integration."
   Suggestion: For more inclusive language, try: 'using more gender-neutral language' instead of 'he'
2. Original: "He needs to improve his communication with the te

# 10. Testing with Sample Reviews

In [None]:
# Test the analyzer with a sample review
test_review = """
John has done a good job this year. He completed the database migration
project ahead of schedule and worked on the new API integration.
He needs to improve his communication with the team and be more proactive
in identifying potential issues before they become problems.
"""

test_objectives = """
Complete database migration by Q3
Improve API integration
Enhance team communication
"""

# Run analysis
test_results = analyzer.analyze(test_review, test_objectives)

# Display results
display_analysis_summary(test_results)

# Visualize results
visualize_feedback_types(test_results)
visualize_objective_alignment(test_results)