In [None]:
# Part 3: Ethics & Optimization
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

print("🔍 ETHICAL AI ANALYSIS")
print("=" * 50)

🔍 ETHICAL AI ANALYSIS


In [None]:
# Analyze potential biases in MNIST dataset
print("📊 MNIST MODEL BIAS ANALYSIS")
print("=" * 40)

# Simulate potential bias scenarios (since we can't access raw MNIST metadata)
print("""
Potential Biases in MNIST Model:

1. **Writing Style Bias:**
   - Most samples from North America/Europe
   - Underrepresents Asian/Arabic numeral styles
   - Cultural variations in handwriting not captured

2. **Demographic Bias:**
   - Collected from specific demographic groups (students, researchers)
   - Age, education level, and cultural background not diverse

3. **Data Collection Bias:**
   - Clean, centered images vs. real-world messy handwriting
   - Limited stroke width and pressure variations

4. **Performance Disparities:**
   - May perform poorly on digits with unusual strokes
   - Could struggle with slanted or rotated writing
""")

# Simulate fairness metrics (in real scenario, use TensorFlow Fairness Indicators)
print("\n🛡️ MITIGATION STRATEGIES for MNIST:")
print("""
- **Data Augmentation:** Rotate, skew, add noise to training data
- **Diverse Data Collection:** Collect digits from global populations
- **Fairness Testing:** Use tools like TensorFlow Fairness Indicators
- **Regular Audits:** Monitor performance across different writing styles
- **Transfer Learning:** Fine-tune on diverse handwriting datasets
""")

📊 MNIST MODEL BIAS ANALYSIS

Potential Biases in MNIST Model:

1. **Writing Style Bias:**
   - Most samples from North America/Europe
   - Underrepresents Asian/Arabic numeral styles
   - Cultural variations in handwriting not captured

2. **Demographic Bias:**
   - Collected from specific demographic groups (students, researchers)
   - Age, education level, and cultural background not diverse

3. **Data Collection Bias:**
   - Clean, centered images vs. real-world messy handwriting
   - Limited stroke width and pressure variations

4. **Performance Disparities:**
   - May perform poorly on digits with unusual strokes
   - Could struggle with slanted or rotated writing


🛡️ MITIGATION STRATEGIES for MNIST:

- **Data Augmentation:** Rotate, skew, add noise to training data
- **Diverse Data Collection:** Collect digits from global populations  
- **Fairness Testing:** Use tools like TensorFlow Fairness Indicators
- **Regular Audits:** Monitor performance across different writing styles
-

In [None]:
# Analyze potential biases in our sentiment analysis
print("\n😊 AMAZON REVIEWS SENTIMENT ANALYSIS BIAS")
print("=" * 50)

print("""
Potential Biases in Review Analysis:

1. **Language & Cultural Bias:**
   - Only analyzed English reviews
   - Cultural expressions of satisfaction vary globally
   - Sarcasm and irony not properly handled

2. **Product Category Bias:**
   - Electronics-heavy sample data
   - Underrepresents books, clothing, home goods
   - Price point bias (mostly premium products)

3. **Demographic Bias:**
   - Reviews from tech-savvy users overrepresented
   - Age, income, and geographic biases in reviewers

4. **Rule-Based Limitations:**
   - Simple keyword matching misses context
   - "Expensive" can be negative (complaint) or positive (luxury)
   - No understanding of comparative statements
""")

print("\n🛡️ MITIGATION STRATEGIES for Reviews:")
print("""
- **Multilingual Models:** Use spaCy models for different languages
- **Context-Aware Analysis:** Implement transformer models for better understanding
- **Diverse Training Data:** Include reviews from various product categories
- **Bias Detection Tools:** Use spaCy's rule-based systems to flag potential biases
- **Human-in-the-Loop:** Manual review of edge cases and ambiguous sentiments
- **Regular Model Updates:** Continuously retrain on new, diverse data
""")


😊 AMAZON REVIEWS SENTIMENT ANALYSIS BIAS

Potential Biases in Review Analysis:

1. **Language & Cultural Bias:**
   - Only analyzed English reviews
   - Cultural expressions of satisfaction vary globally
   - Sarcasm and irony not properly handled

2. **Product Category Bias:**
   - Electronics-heavy sample data
   - Underrepresents books, clothing, home goods
   - Price point bias (mostly premium products)

3. **Demographic Bias:**
   - Reviews from tech-savvy users overrepresented
   - Age, income, and geographic biases in reviewers

4. **Rule-Based Limitations:**
   - Simple keyword matching misses context
   - "Expensive" can be negative (complaint) or positive (luxury)
   - No understanding of comparative statements


🛡️ MITIGATION STRATEGIES for Reviews:

- **Multilingual Models:** Use spaCy models for different languages
- **Context-Aware Analysis:** Implement transformer models for better understanding
- **Diverse Training Data:** Include reviews from various product categorie

In [None]:
# Demonstrate spaCy rule-based bias mitigation
print("\n🔧 SPACY RULE-BASED BIAS MITIGATION EXAMPLE")
print("=" * 50)

import spacy

# Load spaCy model
nlp = spacy.load("en_core_web_sm")

# Create custom rules for bias detection
bias_patterns = [
    {"label": "GENDER_BIAS", "pattern": [{"LOWER": "female"}, {"LOWER": "drivers"}]},
    {"label": "GENDER_BIAS", "pattern": [{"LOWER": "male"}, {"LOWER": "nurse"}]},
    {"label": "AGE_BIAS", "pattern": [{"LOWER": "old"}, {"LOWER": "technology"}]},
    {"label": "CULTURAL_BIAS", "pattern": [{"LOWER": "foreign"}, {"LOWER": "products"}]}
]

# Add patterns to entity ruler
ruler = nlp.add_pipe("entity_ruler", before="ner")
ruler.add_patterns(bias_patterns)

# Test bias detection
test_texts = [
    "Female drivers are less skilled according to this review",
    "This old technology is unreliable",
    "Foreign products have poor quality control",
    "Male nurses are unusual in this hospital"
]

print("Testing bias detection in sample texts:")
for text in test_texts:
    doc = nlp(text)
    print(f"\n📝 Text: {text}")
    biases = [ent for ent in doc.ents if ent.label_ in ["GENDER_BIAS", "AGE_BIAS", "CULTURAL_BIAS"]]
    if biases:
        for bias in biases:
            print(f"   ⚠️  POTENTIAL BIAS DETECTED: {bias.text} → {bias.label_}")
    else:
        print("   ✅ No biases detected")


🔧 SPACY RULE-BASED BIAS MITIGATION EXAMPLE
Testing bias detection in sample texts:

📝 Text: Female drivers are less skilled according to this review
   ⚠️  POTENTIAL BIAS DETECTED: Female drivers → GENDER_BIAS

📝 Text: This old technology is unreliable
   ⚠️  POTENTIAL BIAS DETECTED: old technology → AGE_BIAS

📝 Text: Foreign products have poor quality control
   ⚠️  POTENTIAL BIAS DETECTED: Foreign products → CULTURAL_BIAS

📝 Text: Male nurses are unusual in this hospital
   ✅ No biases detected


In [None]:
print("\n🐛 TROUBLESHOOTING CHALLENGE: DEBUGGING CODE")
print("=" * 50)

# Provided buggy TensorFlow code (with intentional errors)
print("""
ORIGINAL BUGGY CODE:

model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32, (3,3), activation='relu', input_shape=(28, 28)),
    tf.keras.layers.MaxPooling2D((2,2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(10, activation='softmax')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
""")

# Debugged version
print("\n🔧 DEBUGGED AND FIXED CODE:")

import tensorflow as tf

print("""
# FIX 1: Input shape should include channel dimension
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32, (3,3), activation='relu', input_shape=(28, 28, 1)),  # Added channel
    tf.keras.layers.MaxPooling2D((2,2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(10, activation='softmax')
])

# FIX 2: Use correct loss function for multi-class classification
model.compile(optimizer='adam',
              loss='categorical_crossentropy',  # Changed from binary_crossentropy
              metrics=['accuracy'])
""")

print("\n🐛 BUGS IDENTIFIED AND FIXED:")
print("1. ❌ Dimension mismatch: input_shape missing channel dimension")
print("2. ❌ Incorrect loss function: binary_crossentropy for multi-class problem")
print("3. ✅ Fixed: Added channel to input_shape")
print("4. ✅ Fixed: Changed to categorical_crossentropy")


🐛 TROUBLESHOOTING CHALLENGE: DEBUGGING CODE

ORIGINAL BUGGY CODE:

model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32, (3,3), activation='relu', input_shape=(28, 28)),
    tf.keras.layers.MaxPooling2D((2,2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(10, activation='softmax')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


🔧 DEBUGGED AND FIXED CODE:

# FIX 1: Input shape should include channel dimension
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32, (3,3), activation='relu', input_shape=(28, 28, 1)),  # Added channel
    tf.keras.layers.MaxPooling2D((2,2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(10, activation='softmax')
])

# FIX 2: Use correct loss function for multi-class classification
model.compile(optimizer='adam', 
              loss='categorical_crossentropy',  # Changed from binary

In [None]:
# Let's actually run the debugging exercise
print("\n🛠️ PRACTICAL DEBUGGING EXERCISE")
print("=" * 40)

# Create a simple working example to demonstrate the fix
import numpy as np

# Generate sample data
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()

# Preprocess data correctly
X_train = X_train.reshape(-1, 28, 28, 1).astype('float32') / 255.0
X_test = X_test.reshape(-1, 28, 28, 1).astype('float32') / 255.0
y_train_categorical = tf.keras.utils.to_categorical(y_train, 10)
y_test_categorical = tf.keras.utils.to_categorical(y_test, 10)

# Build corrected model
corrected_model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32, (3,3), activation='relu', input_shape=(28, 28, 1)),
    tf.keras.layers.MaxPooling2D((2,2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(10, activation='softmax')
])

corrected_model.compile(optimizer='adam',
                       loss='categorical_crossentropy',
                       metrics=['accuracy'])

print("✅ CORRECTED MODEL SUMMARY:")
corrected_model.summary()

# Quick test to verify it works
print("\n🧪 QUICK VERIFICATION (2 epochs):")
history = corrected_model.fit(X_train[:1000], y_train_categorical[:1000],
                             epochs=2, batch_size=32, verbose=1,
                             validation_split=0.2)

test_loss, test_accuracy = corrected_model.evaluate(X_test[:100], y_test_categorical[:100], verbose=0)
print(f"✅ Debugging successful! Test accuracy: {test_accuracy:.4f}")


🛠️ PRACTICAL DEBUGGING EXERCISE
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


✅ CORRECTED MODEL SUMMARY:



🧪 QUICK VERIFICATION (2 epochs):
Epoch 1/2
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 73ms/step - accuracy: 0.5357 - loss: 1.7287 - val_accuracy: 0.8150 - val_loss: 0.6462
Epoch 2/2
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.8603 - loss: 0.4552 - val_accuracy: 0.8400 - val_loss: 0.6121
✅ Debugging successful! Test accuracy: 0.8600


In [None]:
print("\n📝 ETHICAL REFLECTION SUMMARY")
print("=" * 50)

ethical_insights = """
KEY ETHICAL INSIGHTS FROM OUR AI DEVELOPMENT:

🔍 **Bias Awareness:**
   - Even "simple" datasets like MNIST contain hidden biases
   - Cultural, demographic, and collection biases are pervasive
   - Rule-based systems can amplify existing stereotypes

🛡️ **Mitigation Strategies Implemented:**
   - Data augmentation for diversity
   - Multiple model validation techniques
   - Rule-based bias detection with spaCy
   - Proper loss functions and architecture choices

🚨 **Continuous Monitoring Needed:**
   - Regular fairness audits
   - Diverse test case development
   - Stakeholder feedback incorporation
   - Transparency in model limitations

🌍 **Real-World Impact Considerations:**
   - MNIST: Handwriting recognition systems used in postal, banking, education
   - Sentiment Analysis: Influences product recommendations and business decisions
   - Responsibility: Our models could affect real people and businesses

✅ **Best Practices Demonstrated:**
   - Proper error handling and debugging
   - Documentation of limitations
   - Consideration of edge cases
   - Ethical analysis alongside technical implementation
"""

print(ethical_insights)


📝 ETHICAL REFLECTION SUMMARY

KEY ETHICAL INSIGHTS FROM OUR AI DEVELOPMENT:

🔍 **Bias Awareness:**
   - Even "simple" datasets like MNIST contain hidden biases
   - Cultural, demographic, and collection biases are pervasive
   - Rule-based systems can amplify existing stereotypes

🛡️ **Mitigation Strategies Implemented:**
   - Data augmentation for diversity
   - Multiple model validation techniques  
   - Rule-based bias detection with spaCy
   - Proper loss functions and architecture choices

🚨 **Continuous Monitoring Needed:**
   - Regular fairness audits
   - Diverse test case development
   - Stakeholder feedback incorporation
   - Transparency in model limitations

🌍 **Real-World Impact Considerations:**
   - MNIST: Handwriting recognition systems used in postal, banking, education
   - Sentiment Analysis: Influences product recommendations and business decisions
   - Responsibility: Our models could affect real people and businesses

✅ **Best Practices Demonstrated:**
   - Prop

In [None]:
print("\n⚡ OPTIMIZATION BEST PRACTICES")
print("=" * 45)

optimization_tips = """
PERFORMANCE & OPTIMIZATION TIPS DEMONSTRATED:

🎯 **Model Architecture:**
   - Appropriate layer dimensions and types
   - Correct loss functions for problem type
   - Proper input shape handling

🚀 **Training Efficiency:**
   - GPU utilization (T4 acceleration)
   - Batch size optimization
   - Early stopping and callbacks

🔧 **Code Quality:**
   - Debugging and error resolution
   - Proper data preprocessing
   - Efficient memory usage

📊 **Evaluation Rigor:**
   - Multiple metric tracking
   - Validation set usage
   - Confusion matrix analysis
   - Cross-validation where appropriate

🔄 **Iterative Improvement:**
   - Start simple, then complexify
   - Regular testing and validation
   - Performance benchmarking
"""

print(optimization_tips)


⚡ OPTIMIZATION BEST PRACTICES

PERFORMANCE & OPTIMIZATION TIPS DEMONSTRATED:

🎯 **Model Architecture:**
   - Appropriate layer dimensions and types
   - Correct loss functions for problem type
   - Proper input shape handling

🚀 **Training Efficiency:**
   - GPU utilization (T4 acceleration)
   - Batch size optimization
   - Early stopping and callbacks

🔧 **Code Quality:**
   - Debugging and error resolution
   - Proper data preprocessing
   - Efficient memory usage

📊 **Evaluation Rigor:**
   - Multiple metric tracking
   - Validation set usage
   - Confusion matrix analysis
   - Cross-validation where appropriate

🔄 **Iterative Improvement:**
   - Start simple, then complexify
   - Regular testing and validation
   - Performance benchmarking

