In [17]:
import random
import numpy as np
from collections import defaultdict


#### emerging direction in deep learning : LLms and generative model

In [18]:
# --- Large Language Models (LLMs) ---
class SimpleLLM:
    def __init__(self):
        self.name = "Simple LLM"
        # Simplified vocabulary
        self.vocab = ["the", "cat", "dog", "sits", "runs", "on", "mat", "park", "is", "happy"]
        self.context_window = 5  # How many tokens to consider

        # Simplified "knowledge" - word associations
        self.word_associations = {
            "cat": ["sits", "runs", "is", "happy"],
            "dog": ["runs", "sits", "is", "happy"],
            "sits": ["on", "the"],
            "runs": ["in", "to", "the"],
            "the": ["cat", "dog", "mat", "park"]
        }

        print(f"=== {self.name} INITIALIZED ===")
        print(f"Vocabulary size: {len(self.vocab)}")
        print(f"Context window: {self.context_window}")
        print()

    def tokenize(self, text):
        """Convert text to tokens (simplified)"""
        tokens = text.lower().split()
        print(f"Tokenization: '{text}' → {tokens}")
        return tokens

    def generate_next_token(self, context):
        """Generate next token based on context (simplified attention)"""
        print(f"Context: {context}")

        if not context:
            next_token = random.choice(self.vocab)
        else:
            last_word = context[-1]
            if last_word in self.word_associations:
                # Simplified "attention" - focus on last word
                possible_next = self.word_associations[last_word]
                next_token = random.choice(possible_next)
                print(f"Attention: '{last_word}' → possible next: {possible_next}")
            else:
                next_token = random.choice(self.vocab)

        print(f"Generated token: {next_token}")
        return next_token

    def generate_text(self, prompt, max_length=8):
        """Generate text continuation"""
        print(f"=== TEXT GENERATION ===")
        print(f"Prompt: '{prompt}'")

        tokens = self.tokenize(prompt)
        generated = tokens.copy()

        for i in range(max_length - len(tokens)):
            # Use only recent context (context window)
            context = generated[-self.context_window:]
            next_token = self.generate_next_token(context)
            generated.append(next_token)

            print(f"Step {i+1}: {' '.join(generated)}")

        final_text = ' '.join(generated)
        print(f"Final output: '{final_text}'")
        return final_text

In [19]:
# --- Generative Adversarial Networks (GANs) ---
class SimpleGAN:
    def __init__(self):
        self.name = "Simple GAN"
        print(f"=== {self.name} ===")
        print("Two networks competing: Generator vs Discriminator")

        # Simplified "real" data (simple patterns)
        self.real_data = [
            [1, 1, 0, 0],  # Pattern 1
            [0, 0, 1, 1],  # Pattern 2
            [1, 0, 1, 0],  # Pattern 3
        ]

        # Generator starts with random ability
        self.generator_skill = 0.1
        # Discriminator starts with random ability
        self.discriminator_skill = 0.1

        print(f"Real data patterns: {self.real_data}")
        print()

    def generator_create_fake(self):
        """Generator creates fake data"""
        if random.random() < self.generator_skill:
            # As generator improves, creates more realistic data
            fake_data = random.choice(self.real_data).copy()
            # Add some noise
            if random.random() < 0.3:
                fake_data[random.randint(0, 3)] = 1 - fake_data[random.randint(0, 3)]
        else:
            # Poor generator creates random data
            fake_data = [random.randint(0, 1) for _ in range(4)]

        print(f"Generator created: {fake_data}")
        return fake_data

    def discriminator_judge(self, data):
        """Discriminator tries to identify real vs fake"""
        is_real = data in self.real_data

        # Discriminator's judgment (improves over time)
        if random.random() < self.discriminator_skill:
            judgment = is_real  # Correct judgment
        else:
            judgment = not is_real  # Wrong judgment

        print(f"Discriminator judges {data} as {'REAL' if judgment else 'FAKE'}")
        print(f"Actual truth: {'REAL' if is_real else 'FAKE'}")

        return judgment, is_real

    def train_one_round(self):
        """One round of GAN training"""
        print(f"=== GAN TRAINING ROUND ===")

        # Generator creates fake data
        fake_data = self.generator_create_fake()

        # Discriminator judges fake data
        judgment, truth = self.discriminator_judge(fake_data)

        # Update skills based on performance
        if judgment == truth:
            # Discriminator was correct
            self.discriminator_skill = min(0.9, self.discriminator_skill + 0.1)
            print("Discriminator improved!")
        else:
            # Generator fooled discriminator
            self.generator_skill = min(0.9, self.generator_skill + 0.1)
            print("Generator improved!")

        print(f"Generator skill: {self.generator_skill:.1f}")
        print(f"Discriminator skill: {self.discriminator_skill:.1f}")
        print()

In [20]:
# --- Variational Autoencoders (VAE) ---
class SimpleVAE:
    def __init__(self):
        self.name = "Simple VAE"
        print(f"=== {self.name} ===")
        print("Encode data → Latent space → Decode data")

        # Sample data: simple 2D points
        self.data_points = [
            [1, 1], [1, 2], [2, 1], [2, 2],  # Cluster 1
            [5, 5], [5, 6], [6, 5], [6, 6]   # Cluster 2
        ]

        print(f"Training data: {self.data_points}")
        print()

    def encode_to_latent(self, data_point):
        """Encode data point to latent representation"""
        # Simplified encoding: compress 2D to 1D
        # Add some noise (VAE characteristic)
        latent = sum(data_point) / 2 + random.uniform(-0.5, 0.5)
        print(f"Encode {data_point} → latent: {latent:.2f}")
        return latent

    def decode_from_latent(self, latent):
        """Decode from latent space back to data space"""
        # Simplified decoding: expand 1D to 2D
        # Add reconstruction noise
        decoded = [
            latent + random.uniform(-0.5, 0.5),
            latent + random.uniform(-0.5, 0.5)
        ]
        print(f"Decode latent {latent:.2f} → {[round(x, 1) for x in decoded]}")
        return decoded

    def generate_new_sample(self):
        """Generate new sample by sampling latent space"""
        print("=== VAE GENERATION ===")

        # Sample from latent space
        sampled_latent = random.uniform(1, 6)  # Between our two clusters
        print(f"Sampled latent: {sampled_latent:.2f}")

        # Decode to get new data point
        generated = self.decode_from_latent(sampled_latent)
        print(f"Generated new sample: {[round(x, 1) for x in generated]}")
        return generated

In [21]:
# --- Diffusion Models ---
class SimpleDiffusion:
    def __init__(self):
        self.name = "Simple Diffusion Model"
        print(f"=== {self.name} ===")
        print("Forward: Add noise step by step")
        print("Reverse: Remove noise step by step")

        # Original clean data
        self.clean_data = [5, 5, 5, 5]  # Clean signal
        print(f"Clean data: {self.clean_data}")
        print()

    def add_noise_step(self, data, noise_level):
        """Add noise to data (forward diffusion)"""
        noisy_data = []
        for value in data:
            noise = random.uniform(-noise_level, noise_level)
            noisy_data.append(value + noise)

        print(f"Add noise (level {noise_level}): {[round(x, 1) for x in noisy_data]}")
        return noisy_data

    def remove_noise_step(self, noisy_data, noise_level):
        """Remove noise from data (reverse diffusion)"""
        # Simplified denoising: move towards expected clean value
        denoised_data = []
        for value in noisy_data:
            # Simple denoising: move towards center value (5)
            denoised = value + 0.3 * (5 - value)  # Move 30% towards clean value
            denoised_data.append(denoised)

        print(f"Remove noise: {[round(x, 1) for x in denoised_data]}")
        return denoised_data

    def diffusion_process(self):
        """Complete diffusion process: noise addition then removal"""
        print("=== FORWARD DIFFUSION (Adding Noise) ===")
        current_data = self.clean_data.copy()

        # Add noise in steps
        for step in range(1, 4):
            current_data = self.add_noise_step(current_data, step)

        print("\n=== REVERSE DIFFUSION (Removing Noise) ===")

        # Remove noise in steps
        for step in range(3, 0, -1):
            current_data = self.remove_noise_step(current_data, step)

        print(f"Final reconstructed: {[round(x, 1) for x in current_data]}")
        print(f"Original clean data: {self.clean_data}")
        return current_data

In [22]:
# Demo all generative models
print("=== EMERGING DIRECTIONS DEMO ===")

# LLM Demo
llm = SimpleLLM()
llm.generate_text("the cat", max_length=6)
print()

=== EMERGING DIRECTIONS DEMO ===
=== Simple LLM INITIALIZED ===
Vocabulary size: 10
Context window: 5

=== TEXT GENERATION ===
Prompt: 'the cat'
Tokenization: 'the cat' → ['the', 'cat']
Context: ['the', 'cat']
Attention: 'cat' → possible next: ['sits', 'runs', 'is', 'happy']
Generated token: sits
Step 1: the cat sits
Context: ['the', 'cat', 'sits']
Attention: 'sits' → possible next: ['on', 'the']
Generated token: the
Step 2: the cat sits the
Context: ['the', 'cat', 'sits', 'the']
Attention: 'the' → possible next: ['cat', 'dog', 'mat', 'park']
Generated token: mat
Step 3: the cat sits the mat
Context: ['the', 'cat', 'sits', 'the', 'mat']
Generated token: the
Step 4: the cat sits the mat the
Final output: 'the cat sits the mat the'



In [23]:
# GAN Demo
gan = SimpleGAN()
for round_num in range(3):
    print(f"Round {round_num + 1}:")
    gan.train_one_round()

=== Simple GAN ===
Two networks competing: Generator vs Discriminator
Real data patterns: [[1, 1, 0, 0], [0, 0, 1, 1], [1, 0, 1, 0]]

Round 1:
=== GAN TRAINING ROUND ===
Generator created: [1, 0, 1, 0]
Discriminator judges [1, 0, 1, 0] as FAKE
Actual truth: REAL
Generator improved!
Generator skill: 0.2
Discriminator skill: 0.1

Round 2:
=== GAN TRAINING ROUND ===
Generator created: [0, 1, 0, 1]
Discriminator judges [0, 1, 0, 1] as FAKE
Actual truth: FAKE
Discriminator improved!
Generator skill: 0.2
Discriminator skill: 0.2

Round 3:
=== GAN TRAINING ROUND ===
Generator created: [0, 0, 1, 1]
Discriminator judges [0, 0, 1, 1] as REAL
Actual truth: REAL
Discriminator improved!
Generator skill: 0.2
Discriminator skill: 0.3



In [24]:
# VAE Demo
vae = SimpleVAE()
for i in range(2):
    encoded = vae.encode_to_latent(vae.data_points[i])
    decoded = vae.decode_from_latent(encoded)
vae.generate_new_sample()
print()

=== Simple VAE ===
Encode data → Latent space → Decode data
Training data: [[1, 1], [1, 2], [2, 1], [2, 2], [5, 5], [5, 6], [6, 5], [6, 6]]

Encode [1, 1] → latent: 0.97
Decode latent 0.97 → [0.5, 1.3]
Encode [1, 2] → latent: 1.46
Decode latent 1.46 → [1.1, 1.1]
=== VAE GENERATION ===
Sampled latent: 4.73
Decode latent 4.73 → [5.1, 4.4]
Generated new sample: [5.1, 4.4]



In [25]:
# Diffusion Demo
diffusion = SimpleDiffusion()
diffusion.diffusion_process()
print()

=== Simple Diffusion Model ===
Forward: Add noise step by step
Reverse: Remove noise step by step
Clean data: [5, 5, 5, 5]

=== FORWARD DIFFUSION (Adding Noise) ===
Add noise (level 1): [5.8, 4.9, 5.5, 6.0]
Add noise (level 2): [4.0, 6.4, 6.9, 5.1]
Add noise (level 3): [2.1, 4.0, 8.7, 8.0]

=== REVERSE DIFFUSION (Removing Noise) ===
Remove noise: [3.0, 4.3, 7.6, 7.1]
Remove noise: [3.6, 4.5, 6.8, 6.5]
Remove noise: [4.0, 4.7, 6.3, 6.0]
Final reconstructed: [4.0, 4.7, 6.3, 6.0]
Original clean data: [5, 5, 5, 5]



#### limitation of frontier AI system

In [26]:
class AILimitationsDemo:
    """Demonstrates key limitations of current AI systems"""

    def __init__(self):
        print("=== AI LIMITATIONS DEMONSTRATION ===")
        print()

    def demonstrate_hallucinations(self):
        print("=== HALLUCINATION PROBLEM ===")
        print()

        # Simulate AI knowledge base with gaps
        knowledge_base = {
            "Paris": "Capital of France, population 2.1 million",
            "London": "Capital of UK, population 8.9 million"
        }

        queries = ["Paris", "Tokyo", "Atlantis"]

        for query in queries:
            print(f"Query: What do you know about {query}?")

            if query in knowledge_base:
                print(f"AI Response: {knowledge_base[query]}")
                print("Status: ✓ Accurate")
            else:
                # AI hallucinates when it doesn't know
                fake_facts = [
                    f"{query} is a major city with population 3.2 million",
                    f"{query} was founded in 1847 and is known for its architecture",
                    f"{query} has a famous university established in 1923"
                ]
                hallucination = random.choice(fake_facts)
                print(f"AI Response: {hallucination}")
                print("Status: ❌ HALLUCINATION (False but confident)")
            print()

    def demonstrate_generalization_issues(self):
        """Shows how AI struggles with out-of-distribution examples"""
        print("=== GENERALIZATION PROBLEM ===")
        print("AI fails on examples outside training distribution")
        print()

        # Simulate training on specific patterns
        training_patterns = [
            {"input": [1, 2], "output": 3, "rule": "sum"},
            {"input": [2, 3], "output": 5, "rule": "sum"},
            {"input": [3, 4], "output": 7, "rule": "sum"},
        ]

        print("Training examples:")
        for pattern in training_patterns:
            print(f"  {pattern['input']} → {pattern['output']}")

        print("AI learns: Output = Input1 + Input2")
        print()

        # Test cases
        test_cases = [
            {"input": [4, 5], "expected": 9, "type": "similar"},
            {"input": [100, 200], "expected": 300, "type": "different scale"},
            {"input": [-1, -2], "expected": -3, "type": "negative numbers"},
            {"input": [1.5, 2.5], "expected": 4.0, "type": "decimals"}
        ]

        print("Test results:")
        for test in test_cases:
            actual = sum(test["input"])  # Perfect AI would get this right

            if test["type"] == "similar":
                # AI does well on similar examples
                predicted = actual
                success = True
            else:
                # AI struggles with different distributions
                predicted = actual + random.uniform(-2, 2)  # Add error
                success = abs(predicted - test["expected"]) < 0.1

            print(f"  {test['input']} → Predicted: {predicted:.1f}, Expected: {test['expected']}")
            print(f"    Type: {test['type']} - {'✓ Success' if success else '❌ Failed'}")
        print()

    def demonstrate_uncertainty_issues(self):
        """Shows how AI struggles with uncertainty quantification"""
        print("=== UNCERTAINTY PROBLEM ===")
        print("AI often can't express how confident it is")
        print()

        # Simulate different confidence scenarios
        scenarios = [
            {"question": "What is 2+2?", "confidence": 0.99, "correct": True},
            {"question": "Will it rain tomorrow?", "confidence": 0.95, "correct": False},
            {"question": "What is the capital of Atlantis?", "confidence": 0.90, "correct": False}
        ]

        print("AI Confidence vs Reality:")
        for scenario in scenarios:
            print(f"Question: {scenario['question']}")
            print(f"AI Confidence: {scenario['confidence']*100:.0f}%")
            print(f"Actually Correct: {'Yes' if scenario['correct'] else 'No'}")

            if scenario['confidence'] > 0.8 and not scenario['correct']:
                print("❌ Problem: High confidence but wrong!")
            elif scenario['confidence'] < 0.6 and scenario['correct']:
                print("⚠️  Problem: Low confidence but actually right!")
            else:
                print("✓ Confidence matches correctness")
            print()

        print("Key Issue: AI should say 'I don't know' more often!")
        print()

#### ethical challenges in AI system

In [27]:
class AIEthicsDemo:
    """Demonstrates key ethical challenges in AI deployment"""

    def __init__(self):
        print("=== AI ETHICS CHALLENGES ===")
        print()

    def demonstrate_bias_problem(self):
        """Shows how AI can perpetuate societal biases"""
        print("=== BIAS PROBLEM ===")
        print("AI systems can amplify human biases from training data")
        print()

        # Simulate biased training data
        hiring_data = [
            {"name": "John", "gender": "M", "degree": "CS", "hired": True},
            {"name": "Mike", "gender": "M", "degree": "CS", "hired": True},
            {"name": "Sarah", "gender": "F", "degree": "CS", "hired": False},
            {"name": "Anna", "gender": "F", "degree": "CS", "hired": False},
            {"name": "David", "gender": "M", "degree": "Math", "hired": True},
            {"name": "Lisa", "gender": "F", "degree": "Math", "hired": False}
        ]

        print("Historical hiring data (biased):")
        for record in hiring_data:
            print(f"  {record['name']} ({record['gender']}, {record['degree']}) → {'Hired' if record['hired'] else 'Rejected'}")

        # AI learns biased pattern
        male_hire_rate = sum(1 for r in hiring_data if r['gender'] == 'M' and r['hired']) / sum(1 for r in hiring_data if r['gender'] == 'M')
        female_hire_rate = sum(1 for r in hiring_data if r['gender'] == 'F' and r['hired']) / sum(1 for r in hiring_data if r['gender'] == 'F')

        print(f"\nAI learned pattern:")
        print(f"  Male hire rate: {male_hire_rate*100:.0f}%")
        print(f"  Female hire rate: {female_hire_rate*100:.0f}%")
        print("❌ Problem: AI perpetuates gender bias!")
        print()

        # Show impact on new candidates
        new_candidates = [
            {"name": "Alex", "gender": "M", "degree": "CS"},
            {"name": "Emma", "gender": "F", "degree": "CS"}
        ]

        print("New candidate predictions:")
        for candidate in new_candidates:
            if candidate['gender'] == 'M':
                prediction = "Likely to be hired" if male_hire_rate > 0.5 else "Likely to be rejected"
            else:
                prediction = "Likely to be hired" if female_hire_rate > 0.5 else "Likely to be rejected"

            print(f"  {candidate['name']} ({candidate['gender']}) → {prediction}")

        print("❌ Bias amplified: Equally qualified candidates treated differently!")
        print()

    def demonstrate_privacy_concerns(self):
        """Shows privacy issues with AI systems"""
        print("=== PRIVACY CONCERNS ===")
        print("AI systems can infer sensitive information")
        print()

        # Simulate user data
        user_activities = [
            {"user": "User1", "activity": "Searches for diabetes symptoms"},
            {"user": "User1", "activity": "Visits pharmacy websites"},
            {"user": "User1", "activity": "Reads insurance articles"},
            {"user": "User2", "activity": "Looks up job interview tips"},
            {"user": "User2", "activity": "Updates resume"},
            {"user": "User2", "activity": "Checks competitor salaries"}
        ]

        print("User activity data:")
        for activity in user_activities:
            print(f"  {activity['user']}: {activity['activity']}")

        print("\nAI inferences:")

        # AI can infer sensitive information
        user1_activities = [a['activity'] for a in user_activities if a['user'] == 'User1']
        user2_activities = [a['activity'] for a in user_activities if a['user'] == 'User2']

        print("User1 pattern analysis:")
        print("  → Likely has health concerns (diabetes)")
        print("  → May be seeking insurance")
        print("  ❌ Privacy risk: Health information inferred!")
        print()

        print("User2 pattern analysis:")
        print("  → Likely job searching")
        print("  → May be dissatisfied with current job")
        print("  ❌ Privacy risk: Employment status inferred!")
        print()

    def demonstrate_accountability_problem(self):
        """Shows accountability challenges with AI decisions"""
        print("=== ACCOUNTABILITY PROBLEM ===")
        print("Who is responsible when AI makes harmful decisions?")
        print()

        # Simulate AI decision-making chain
        decision_chain = [
            {"actor": "Data Scientists", "role": "Collected training data"},
            {"actor": "ML Engineers", "role": "Built the model"},
            {"actor": "Product Team", "role": "Deployed the system"},
            {"actor": "AI System", "role": "Made the decision"},
            {"actor": "Company", "role": "Owns the system"}
        ]

        print("AI Decision Chain:")
        for link in decision_chain:
            print(f"  {link['actor']}: {link['role']}")

        print("\nScenario: AI loan system denies loan to qualified applicant")
        print("\nAccountability questions:")
        print("  • Data Scientists: 'We just collected available data'")
        print("  • ML Engineers: 'We optimized for accuracy metrics'")
        print("  • Product Team: 'We followed standard deployment process'")
        print("  • AI System: 'I processed inputs according to training'")
        print("  • Company: 'We used industry-standard AI practices'")
        print()
        print("❌ Problem: Everyone has plausible deniability!")
        print("❌ Result: No clear accountability for harmful outcomes")
        print()


In [28]:
# Demo all limitations and ethics
limitations_demo = AILimitationsDemo()
limitations_demo.demonstrate_hallucinations()
limitations_demo.demonstrate_generalization_issues()
limitations_demo.demonstrate_uncertainty_issues()

=== AI LIMITATIONS DEMONSTRATION ===

=== HALLUCINATION PROBLEM ===
AI generates false information with high confidence

Query: What do you know about Paris?
AI Response: Capital of France, population 2.1 million
Status: ✓ Accurate

Query: What do you know about Tokyo?
AI Response: Tokyo was founded in 1847 and is known for its architecture
Status: ❌ HALLUCINATION (False but confident)

Query: What do you know about Atlantis?
AI Response: Atlantis is a major city with population 3.2 million
Status: ❌ HALLUCINATION (False but confident)

=== GENERALIZATION PROBLEM ===
AI fails on examples outside training distribution

Training examples:
  [1, 2] → 3
  [2, 3] → 5
  [3, 4] → 7
AI learns: Output = Input1 + Input2

Test results:
  [4, 5] → Predicted: 9.0, Expected: 9
    Type: similar - ✓ Success
  [100, 200] → Predicted: 300.1, Expected: 300
    Type: different scale - ❌ Failed
  [-1, -2] → Predicted: -3.4, Expected: -3
    Type: negative numbers - ❌ Failed
  [1.5, 2.5] → Predicted: 6.0, 

In [29]:
ethics_demo = AIEthicsDemo()
ethics_demo.demonstrate_bias_problem()
ethics_demo.demonstrate_privacy_concerns()
ethics_demo.demonstrate_accountability_problem()

=== AI ETHICS CHALLENGES ===

=== BIAS PROBLEM ===
AI systems can amplify human biases from training data

Historical hiring data (biased):
  John (M, CS) → Hired
  Mike (M, CS) → Hired
  Sarah (F, CS) → Rejected
  Anna (F, CS) → Rejected
  David (M, Math) → Hired
  Lisa (F, Math) → Rejected

AI learned pattern:
  Male hire rate: 100%
  Female hire rate: 0%
❌ Problem: AI perpetuates gender bias!

New candidate predictions:
  Alex (M) → Likely to be hired
  Emma (F) → Likely to be rejected
❌ Bias amplified: Equally qualified candidates treated differently!

=== PRIVACY CONCERNS ===
AI systems can infer sensitive information

User activity data:
  User1: Searches for diabetes symptoms
  User1: Visits pharmacy websites
  User1: Reads insurance articles
  User2: Looks up job interview tips
  User2: Updates resume
  User2: Checks competitor salaries

AI inferences:
User1 pattern analysis:
  → Likely has health concerns (diabetes)
  → May be seeking insurance
  ❌ Privacy risk: Health informa