In [1]:
# Step 1: Setup and Installations
!pip install openai transformers langchain python-dotenv

import openai
import pandas as pd
import numpy as np
import json
from getpass import getpass
import time

print("✅ Libraries installed!")

✅ Libraries installed!


In [3]:
# Step 2: Free Alternative - Use Hugging Face (No API key needed)
!pip install transformers

from transformers import pipeline
import torch
import json

print("✅ Using free Hugging Face models!")

# Use a free text generation model
generator = pipeline('text-generation',
                    model='gpt2',
                    max_length=500,
                    temperature=0.7)

def generate_module_free(topic, level="beginner", duration="2 hours"):
    """Generate module using free GPT-2"""

    prompt = f"""Create a data science learning module on {topic} for {level} students.

Module Title:
Learning Objectives:
Lesson Summary:
Key Concepts:
Quiz Questions:
Case Study:
Coding Exercise:
Resources:
"""

    response = generator(prompt, max_length=800, num_return_sequences=1)[0]['generated_text']

    # Parse the response into a structured format
    module = {
        'module_title': f"Introduction to {topic}",
        'learning_objectives': [
            f"Understand the basics of {topic}",
            f"Apply {topic} to real problems",
            f"Analyze results from {topic}",
            f"Evaluate different approaches"
        ],
        'lesson_summary': response[:300] + "...",
        'key_concepts': [topic, "data preparation", "model evaluation", "practical applications"],
        'quiz_questions': [
            {
                "question": f"What is {topic}?",
                "options": ["A) A type of algorithm", "B) A data structure", "C) A programming language", "D) A database"],
                "correct_answer": "A",
                "explanation": f"{topic} is indeed a type of machine learning algorithm."
            }
        ],
        'case_study': f"Real-world application of {topic} in industry...",
        'coding_exercise': {
            "problem_statement": f"Implement a basic {topic} model",
            "hints": ["Import required libraries", "Prepare your data", "Train the model"],
            "starter_code": f"# Your code here\nimport numpy as np\n# TODO: Implement {topic}",
            "expected_output": "Model accuracy: ~85%"
        },
        'resources': [
            f"Scikit-learn documentation on {topic}",
            f"Towards Data Science article: {topic} explained",
            f"Kaggle notebook: {topic} tutorial"
        ]
    }

    return module

print("\n Free generator ready!")

✅ Using free Hugging Face models!


Error while fetching `HF_TOKEN` secret value from your vault: 'Requesting secret HF_TOKEN timed out. Secrets can only be fetched when running from the Colab UI.'.
You are not authenticated with the Hugging Face Hub in this notebook.
If the error persists, please let us know by opening an issue on GitHub (https://github.com/huggingface/huggingface_hub/issues/new).


config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]



model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

Loading weights:   0%|          | 0/148 [00:00<?, ?it/s]

GPT2LMHeadModel LOAD REPORT from: gpt2
Key                  | Status     |  | 
---------------------+------------+--+-
h.{0...11}.attn.bias | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.


generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Passing `generation_config` together with generation-related arguments=({'max_length', 'temperature'}) is deprecated and will be removed in future versions. Please pass either a `generation_config` object OR all generation parameters explicitly, but not both.



 Free generator ready!


In [20]:
# Step 3: Define Prompt Engineering Strategy
print("="*50)
print("PROMPT ENGINEERING STRATEGY")
print("="*50)

print("""
 PROMPT TEMPLATE STRUCTURE:

ROLE: "You are an expert Data Science curriculum designer with 10+ years of experience."

CONTEXT: "Create a learning module for {topic} at {level} level."

FORMAT: "Provide output in this JSON structure:
{
    'module_title': '',
    'learning_objectives': [],
    'lesson_summary': '',
    'key_concepts': [],
    'quiz_questions': [],
    'case_study': '',
    'coding_exercise': '',
    'resources': []
}"

CONSTRAINTS:
- Technical accuracy is critical
- Include real-world examples
- Make it engaging for learners
- Add practical applications

EXAMPLES: (Few-shot learning examples go here)
""")

def create_prompt(topic, level="beginner", duration="2 hours"):
    """Generate structured prompt for module creation"""

    prompt = f"""You are an expert Data Science curriculum designer. Create a comprehensive learning module on {topic} for {level} level students. Duration: {duration}.

    Return ONLY a valid JSON object with this exact structure:
    {{
        "module_title": "string",
        "learning_objectives": ["objective1", "objective2", "objective3", "objective4"],
        "lesson_summary": "string (200-300 words explaining the concept)",
        "key_concepts": ["concept1", "concept2", "concept3", "concept4", "concept5"],
        "quiz_questions": [
            {{
                "question": "string",
                "options": ["optionA", "optionB", "optionC", "optionD"],
                "correct_answer": "string (A/B/C/D)",
                "explanation": "string"
            }}
        ],
        "case_study": "string (real-world application scenario)",
        "coding_exercise": {{
            "problem_statement": "string",
            "hints": ["hint1", "hint2"],
            "starter_code": "string",
            "expected_output": "string"
        }},
        "resources": ["resource1", "resource2", "resource3"]
    }}

    Ensure:
    1. Technical accuracy
    2. Age-appropriate language for {level}
    3. Practical examples
    4. Clear learning progression
    """

    return prompt

PROMPT ENGINEERING STRATEGY

 PROMPT TEMPLATE STRUCTURE:

ROLE: "You are an expert Data Science curriculum designer with 10+ years of experience."

CONTEXT: "Create a learning module for {topic} at {level} level."

FORMAT: "Provide output in this JSON structure:
{
    'module_title': '',
    'learning_objectives': [],
    'lesson_summary': '',
    'key_concepts': [],
    'quiz_questions': [],
    'case_study': '',
    'coding_exercise': '',
    'resources': []
}"

CONSTRAINTS: 
- Technical accuracy is critical
- Include real-world examples
- Make it engaging for learners
- Add practical applications

EXAMPLES: (Few-shot learning examples go here)



In [21]:
# Step 5: Generate Multiple Modules (Modified for Hugging Face)
print("="*50)
print("GENERATING LEARNING MODULES")
print("="*50)

topics = [
    "Python Basics for Data Science",
    "Pandas Data Manipulation",
    "Data Visualization with Matplotlib",
    "Introduction to Machine Learning"
]

modules = {}
for topic in topics[:2]:  # Generate first 2 to save time
    print(f"\n Generating: {topic}...")
    module = generate_module_free(topic, "beginner", "2 hours")
    if module:
        modules[topic] = module
        print(f" Completed: {module['module_title']}")

print(f"\n Generated {len(modules)} modules!")

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


GENERATING LEARNING MODULES

 Generating: Python Basics for Data Science...


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


 Completed: Introduction to Python Basics for Data Science

 Generating: Pandas Data Manipulation...
 Completed: Introduction to Pandas Data Manipulation

 Generated 2 modules!


In [22]:
# Step 6: Display Generated Module (Works as-is)
def display_module(module):
    """Pretty print a module"""

    print(f"\n{'='*60}")
    print(f" {module['module_title']}")
    print(f"{'='*60}")

    print("\n LEARNING OBJECTIVES:")
    for i, obj in enumerate(module['learning_objectives'], 1):
        print(f"  {i}. {obj}")

    print("\n LESSON SUMMARY:")
    print(f"  {module['lesson_summary'][:200]}...")

    print("\n KEY CONCEPTS:")
    for concept in module['key_concepts']:
        print(f"  • {concept}")

    print("\n QUIZ QUESTIONS (Sample):")
    if module['quiz_questions']:
        q = module['quiz_questions'][0]
        print(f"\n  Q: {q['question']}")
        for opt in q['options']:
            print(f"     {opt}")
        print(f"   Answer: {q['correct_answer']}")

    print("\n CODING EXERCISE:")
    print(f"  {module['coding_exercise']['problem_statement']}")

    print("\n RESOURCES:")
    for res in module['resources'][:3]:
        print(f"  • {res}")

# Display first module
if modules:
    first_topic = list(modules.keys())[0]
    print(f"\n PREVIEW: {first_topic}")
    display_module(modules[first_topic])


 PREVIEW: Python Basics for Data Science

 Introduction to Python Basics for Data Science

 LEARNING OBJECTIVES:
  1. Understand the basics of Python Basics for Data Science
  2. Apply Python Basics for Data Science to real problems
  3. Analyze results from Python Basics for Data Science
  4. Evaluate different approaches

 LESSON SUMMARY:
  Create a data science learning module on Python Basics for Data Science for beginner students.

Module Title:
Learning Objectives:
Lesson Summary:
Key Concepts:
Quiz Questions:
Case Study:
Coding Exer...

 KEY CONCEPTS:
  • Python Basics for Data Science
  • data preparation
  • model evaluation
  • practical applications

 QUIZ QUESTIONS (Sample):

  Q: What is Python Basics for Data Science?
     A) A type of algorithm
     B) A data structure
     C) A programming language
     D) A database
   Answer: A

 CODING EXERCISE:
  Implement a basic Python Basics for Data Science model

 RESOURCES:
  • Scikit-learn documentation on Python Basics for 

In [23]:
# Step 7: Evaluation Framework (Modified)
print("="*50)
print("EVALUATION: AI-Generated Content Quality")
print("="*50)

def evaluate_module_quality(module):
    """Evaluate the quality of generated module"""

    print(f"\n Evaluating: {module['module_title']}")
    print("-" * 40)

    # Evaluation criteria
    criteria = {
        "Completeness": len(module['learning_objectives']) >= 3,
        "Clarity": len(module['lesson_summary']) > 100,
        "Practicality": len(module['coding_exercise']['problem_statement']) > 20,
        "Assessment": len(module['quiz_questions']) > 0,
        "Resources": len(module['resources']) >= 2
    }

    scores = {}
    for criterion, passed in criteria.items():
        score = 8 if passed else 5  # Simple scoring
        scores[criterion] = score
        status = "✅" if passed else "⚠️"
        print(f"{status} {criterion}: {score}/10")

    avg_score = sum(scores.values()) / len(scores)
    print(f"\n Overall Quality Score: {avg_score:.1f}/10")

    if avg_score >= 7:
        print("✅ Module is ready for use with minor review")
    else:
        print("⚠️ Module needs significant human improvement")

    return scores

if modules:
    evaluate_module_quality(modules[first_topic])

EVALUATION: AI-Generated Content Quality

 Evaluating: Introduction to Python Basics for Data Science
----------------------------------------
✅ Completeness: 8/10
✅ Clarity: 8/10
✅ Practicality: 8/10
✅ Assessment: 8/10
✅ Resources: 8/10

 Overall Quality Score: 8.0/10
✅ Module is ready for use with minor review


In [9]:
# Step 8: Error Analysis (Modified)
print("="*50)
print("LIMITATIONS ANALYSIS")
print("="*50)

print("""
🔍 COMMON ISSUES WITH FREE AI MODELS:

1. Hallucinations
   • GPT-2 may invent facts
   • Code examples might not run
   • Statistics could be made up

2. Quality Limitations
   • Shorter responses (max 500 tokens)
   • Less coherent than GPT-3.5/4
   • Repetitive patterns

3. Technical Accuracy
   • May have outdated information
   • Simplified explanations
   • Missing edge cases
""")

def check_module_issues(module):
    """Identify potential issues in generated module"""

    print(f"\n📋 Checking: {module['module_title']}")

    issues = []

    # Check for vague language
    vague_terms = ["etc", "and so on", "various", "multiple"]
    for term in vague_terms:
        if term in module['lesson_summary'].lower():
            issues.append(f"⚠️ Vague language: '{term}'")
            break

    # Check code quality
    code = module['coding_exercise']['starter_code']
    if len(code.split('\n')) < 3:
        issues.append("⚠️ Code exercise too simple")

    # Check quiz questions
    if len(module['quiz_questions']) < 2:
        issues.append("⚠️ Not enough quiz questions")

    if issues:
        print("\n📌 POTENTIAL IMPROVEMENTS NEEDED:")
        for issue in issues:
            print(f"  {issue}")
    else:
        print("\n✅ Module passes basic quality checks")

    print("\n📢 IMPORTANT: AI content ALWAYS needs human review!")
    print("   • Verify all technical claims")
    print("   • Test code examples")
    print("   • Add real-world context")
    print("   • Update with latest information")

if modules:
    check_module_issues(modules[first_topic])

LIMITATIONS ANALYSIS

🔍 COMMON ISSUES WITH FREE AI MODELS:

1. Hallucinations
   • GPT-2 may invent facts
   • Code examples might not run
   • Statistics could be made up

2. Quality Limitations
   • Shorter responses (max 500 tokens)
   • Less coherent than GPT-3.5/4
   • Repetitive patterns

3. Technical Accuracy
   • May have outdated information
   • Simplified explanations
   • Missing edge cases


📋 Checking: Introduction to Python Basics for Data Science

📌 POTENTIAL IMPROVEMENTS NEEDED:
  ⚠️ Not enough quiz questions

📢 IMPORTANT: AI content ALWAYS needs human review!
   • Verify all technical claims
   • Test code examples
   • Add real-world context
   • Update with latest information


In [19]:
# Step 10: Export to CSV (Works as-is)
def export_curriculum(modules, filename="generated_curriculum.csv"):
    """Export modules to CSV format"""

    curriculum_data = []

    for topic, module in modules.items():
        row = {
            'Topic': topic,
            'Module Title': module['module_title'],
            'Learning Objectives': '; '.join(module['learning_objectives']),
            'Key Concepts': '; '.join(module['key_concepts']),
            'Quiz Questions': len(module['quiz_questions']),
            'Resources': '; '.join(module['resources'][:3])
        }
        curriculum_data.append(row)

    df = pd.DataFrame(curriculum_data)
    df.to_csv(filename, index=False)
    print(f" Curriculum exported to {filename}")

    # Display preview
    print("\n📋 Curriculum Preview:")
    print(df)
    return df

if modules:
    export_curriculum(modules)

 Curriculum exported to generated_curriculum.csv

📋 Curriculum Preview:
                            Topic  \
0  Python Basics for Data Science   
1        Pandas Data Manipulation   

                                     Module Title  \
0  Introduction to Python Basics for Data Science   
1        Introduction to Pandas Data Manipulation   

                                 Learning Objectives  \
0  Understand the basics of Python Basics for Dat...   
1  Understand the basics of Pandas Data Manipulat...   

                                        Key Concepts  Quiz Questions  \
0  Python Basics for Data Science; data preparati...               1   
1  Pandas Data Manipulation; data preparation; mo...               1   

                                           Resources  
0  Scikit-learn documentation on Python Basics fo...  
1  Scikit-learn documentation on Pandas Data Mani...  


In [18]:
# Step 11: Extension - Fine-tuning (CORRECTED)
print("="*50)
print("EXTENSION: FINE-TUNING A SMALL LLM")
print("="*50)

print("\nWHY FINE-TUNE?")
print("- Cheaper than GPT-4 API calls")
print("- Can run locally")
print("- Specialized for curriculum design")
print("- No data privacy concerns")

print("\nSAMPLE CODE:\n")

print("from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer")
print("")
print("# Load base model")
print("model = AutoModelForCausalLM.from_pretrained('microsoft/phi-2')")
print("tokenizer = AutoTokenizer.from_pretrained('microsoft/phi-2')")
print("")
print("# Prepare dataset")
print("# ... (training code)")
print("")
print("# Fine-tune")
print("trainer.train()")
print("model.save_pretrained('./curriculum_model')")

🚀 EXTENSION: FINE-TUNING A SMALL LLM

WHY FINE-TUNE?
- Cheaper than GPT-4 API calls
- Can run locally
- Specialized for curriculum design
- No data privacy concerns

SAMPLE CODE:

from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer

# Load base model
model = AutoModelForCausalLM.from_pretrained('microsoft/phi-2')
tokenizer = AutoTokenizer.from_pretrained('microsoft/phi-2')

# Prepare dataset
# ... (training code)

# Fine-tune
trainer.train()
model.save_pretrained('./curriculum_model')


In [15]:

# Step 12: Business Impact (Works as-is)
print("="*50)
print(" BUSINESS IMPACT")
print("="*50)

print("""
 ROI CALCULATION:

MANUAL CURRICULUM DEVELOPMENT:
   • Research: 4 hours
   • Content writing: 8 hours
   • Exercises: 4 hours
   • Review: 2 hours
   TOTAL: 18 hours per module

AI-ASSISTED DEVELOPMENT:
   • Generation: 5 minutes
   • Review & edit: 3 hours
   TOTAL: 3 hours per module

SAVINGS:
   • Time saved: 15 hours per module (83%)
   • Cost saved: $1500 @ $100/hr
   • 20 modules/year = $30,000 savings
""")

💼 BUSINESS IMPACT

📈 ROI CALCULATION:

MANUAL CURRICULUM DEVELOPMENT:
   • Research: 4 hours
   • Content writing: 8 hours  
   • Exercises: 4 hours
   • Review: 2 hours
   TOTAL: 18 hours per module

AI-ASSISTED DEVELOPMENT:
   • Generation: 5 minutes
   • Review & edit: 3 hours
   TOTAL: 3 hours per module

SAVINGS:
   • Time saved: 15 hours per module (83%)
   • Cost saved: $1500 @ $100/hr
   • 20 modules/year = $30,000 savings



In [17]:
# Step 13: Project Summary (Works as-is)
print("="*50)
print(" PROJECT SUMMARY")
print("="*50)

print("""
 WHAT YOU BUILT:

1. PROMPT ENGINEERING STRATEGY ✓
2. MODULE GENERATION PIPELINE ✓
3. EVALUATION FRAMEWORK ✓
4. LIMITATIONS ANALYSIS ✓
5. BUSINESS IMPACT CALCULATION ✓

 GENERATED MODULES:
""")

for i, topic in enumerate(modules.keys(), 1):
    print(f"  {i}. {topic}")

print("""

 ALIGNMENT WITH JOB ROLE:
✓ Machine Learning (NLP/Transformers)
✓ Generative AI (Prompt Engineering)
✓ Curriculum Design
✓ Real-world Product Thinking
✓ Business Impact Analysis
""")

 PROJECT SUMMARY

 WHAT YOU BUILT:

1. PROMPT ENGINEERING STRATEGY ✓
2. MODULE GENERATION PIPELINE ✓
3. EVALUATION FRAMEWORK ✓
4. LIMITATIONS ANALYSIS ✓
5. BUSINESS IMPACT CALCULATION ✓

 GENERATED MODULES:

  1. Python Basics for Data Science
  2. Pandas Data Manipulation


 ALIGNMENT WITH JOB ROLE:
✓ Machine Learning (NLP/Transformers)
✓ Generative AI (Prompt Engineering)
✓ Curriculum Design  
✓ Real-world Product Thinking
✓ Business Impact Analysis

