## 1. Setup & Imports

In [None]:
import sys
sys.path.insert(0, '/Users/hp/Documents/Akulearn_docs')

import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from datetime import datetime, timedelta
import json

# Import our content generator
from mlops.exam_content_generator import (
    ExamContentOrchestrator,
    GenerationRequest,
    ExamBoard,
    Difficulty,
)

print("‚úì All imports successful!")

## 2. Generate Content for Different Exam Boards

In [None]:
# Initialize orchestrator
orchestrator = ExamContentOrchestrator()

# Generate questions for WAEC
waec_request = GenerationRequest(
    exam_board=ExamBoard.WAEC,
    subject="mathematics",
    topic="algebra",
    difficulty=Difficulty.MEDIUM,
    question_count=15
)

print("\nüìù Generating WAEC Mathematics Content...")
waec_result = orchestrator.generate_content_batch(waec_request)
print(f"‚úì Generated {len(waec_result['generated'])} questions")
print(f"‚úì {len(waec_result['validated'])} questions passed validation")

In [None]:
# Generate questions for NECO
neco_request = GenerationRequest(
    exam_board=ExamBoard.NECO,
    subject="biology",
    topic="photosynthesis",
    difficulty=Difficulty.EASY,
    question_count=12
)

print("\nüìù Generating NECO Biology Content...")
neco_result = orchestrator.generate_content_batch(neco_request)
print(f"‚úì Generated {len(neco_result['generated'])} questions")
print(f"‚úì {len(neco_result['validated'])} questions passed validation")

In [None]:
# Generate questions for JAMB
jamb_request = GenerationRequest(
    exam_board=ExamBoard.JAMB,
    subject="chemistry",
    topic="periodic_table",
    difficulty=Difficulty.HARD,
    question_count=18
)

print("\nüìù Generating JAMB Chemistry Content...")
jamb_result = orchestrator.generate_content_batch(jamb_request)
print(f"‚úì Generated {len(jamb_result['generated'])} questions")
print(f"‚úì {len(jamb_result['validated'])} questions passed validation")

## 3. Analyze Generation Statistics

In [None]:
# Collect all generated questions
all_questions = waec_result['validated'] + neco_result['validated'] + jamb_result['validated']

# Create DataFrame for analysis
data = []
for q in all_questions:
    data.append({
        'exam_board': q.exam_board.value.upper(),
        'subject': q.subject,
        'topic': q.topic,
        'difficulty': q.difficulty.value,
        'quality_score': q.quality_score,
        'relevance_score': q.relevance_score,
        'avg_score': (q.quality_score + q.relevance_score) / 2,
    })

df = pd.DataFrame(data)

print("\nüìä CONTENT GENERATION STATISTICS")
print("="*50)
print(f"Total Questions Generated: {len(df)}")
print(f"\nBy Exam Board:")
print(df['exam_board'].value_counts())
print(f"\nBy Subject:")
print(df['subject'].value_counts())
print(f"\nBy Difficulty:")
print(df['difficulty'].value_counts())
print(f"\nQuality Metrics:")
print(f"  Avg Quality Score:   {df['quality_score'].mean():.3f}")
print(f"  Avg Relevance Score: {df['relevance_score'].mean():.3f}")
print(f"  Avg Overall Score:   {df['avg_score'].mean():.3f}")

## 4. Visualizations

In [None]:
# Chart 1: Distribution by Exam Board
fig1 = px.bar(
    df['exam_board'].value_counts().reset_index().rename(columns={'count': 'Number of Questions', 'exam_board': 'Exam Board'}),
    x='Exam Board',
    y='Number of Questions',
    color='Exam Board',
    title='Questions Generated by Exam Board',
    color_discrete_map={'WAEC': '#1f77b4', 'NECO': '#ff7f0e', 'JAMB': '#2ca02c'}
)
fig1.update_layout(showlegend=False, height=400)
fig1.show()

In [None]:
# Chart 2: Difficulty Distribution
difficulty_order = ['easy', 'medium', 'hard']
difficulty_data = df['difficulty'].value_counts().reindex(difficulty_order, fill_value=0)

fig2 = px.pie(
    values=difficulty_data.values,
    names=difficulty_data.index,
    title='Question Difficulty Distribution',
    color_discrete_map={'easy': '#90EE90', 'medium': '#FFD700', 'hard': '#FF6B6B'}
)
fig2.update_layout(height=400)
fig2.show()

In [None]:
# Chart 3: Quality Metrics by Exam Board
quality_by_board = df.groupby('exam_board')[['quality_score', 'relevance_score']].mean().reset_index()

fig3 = make_subplots(
    rows=1, cols=2,
    subplot_titles=("Average Quality Score", "Average Relevance Score")
)

fig3.add_trace(
    go.Bar(x=quality_by_board['exam_board'], y=quality_by_board['quality_score'], 
           name='Quality', marker_color='#1f77b4'),
    row=1, col=1
)

fig3.add_trace(
    go.Bar(x=quality_by_board['exam_board'], y=quality_by_board['relevance_score'], 
           name='Relevance', marker_color='#ff7f0e'),
    row=1, col=2
)

fig3.update_layout(title_text="Quality Metrics by Exam Board", height=400, showlegend=False)
fig3.update_yaxes(range=[0, 1])
fig3.show()

In [None]:
# Chart 4: Subject Distribution
fig4 = px.bar(
    df['subject'].value_counts().reset_index().rename(columns={'count': 'Count', 'subject': 'Subject'}),
    x='Subject',
    y='Count',
    title='Questions by Subject',
    color='Subject'
)
fig4.update_layout(height=400, showlegend=False)
fig4.show()

In [None]:
# Chart 5: Scatter plot - Quality vs Relevance
fig5 = px.scatter(
    df,
    x='quality_score',
    y='relevance_score',
    color='exam_board',
    size='avg_score',
    hover_data=['difficulty'],
    title='Quality vs Relevance Score Analysis',
    labels={'quality_score': 'Quality Score', 'relevance_score': 'Relevance Score'},
    color_discrete_map={'WAEC': '#1f77b4', 'NECO': '#ff7f0e', 'JAMB': '#2ca02c'}
)
fig5.update_layout(height=500)
fig5.show()

## 5. Export Generated Content

In [None]:
# Export all questions to JSON
output_path = orchestrator.export_to_json(
    all_questions,
    'runs/exam_content_batch.json'
)

print(f"\n‚úì Exported {len(all_questions)} questions to: {output_path}")

# Show sample question
sample_question = all_questions[0]
print(f"\nüìå Sample Question:")
print(f"  Exam Board: {sample_question.exam_board.value.upper()}")
print(f"  Subject: {sample_question.subject}")
print(f"  Topic: {sample_question.topic}")
print(f"  Difficulty: {sample_question.difficulty.value}")
print(f"\n  Question: {sample_question.question_text}")
print(f"\n  Options:")
for i, opt in enumerate(sample_question.options, 1):
    print(f"    {i}. {opt}")
print(f"\n  Correct Answer: {sample_question.correct_answer}")
print(f"  Quality Score: {sample_question.quality_score:.3f}")
print(f"  Relevance Score: {sample_question.relevance_score:.3f}")

## 6. Next Steps: Integration with Google Tools

### Notebook LM Integration
- Upload textbooks/study materials to Notebook LM
- Generate audio study guides for selected topics
- Embed audio URLs in quiz app

### Google AI Studio Integration
- Test improved prompts for question generation
- Refine question templates
- A/B test explanation styles

### Hugging Face Hub
- Fine-tune models on exam-specific data
- Use better-performing models for generation
- Deploy via Hugging Face Inference API

In [None]:
print("\nüéØ SUMMARY")
print("="*60)
print(f"Total questions generated: {len(all_questions)}")
print(f"Average quality score: {df['quality_score'].mean():.3f}/1.0")
print(f"Average relevance score: {df['relevance_score'].mean():.3f}/1.0")
print(f"\nBy exam board:")
print(f"  WAEC:  {len(waec_result['validated'])} questions")
print(f"  NECO:  {len(neco_result['validated'])} questions")
print(f"  JAMB:  {len(jamb_result['validated'])} questions")
print(f"\nNext: Train models, integrate with Google tools, scale to production")
print("="*60)