In [0]:
-- =====================================================
-- Day 3: Analysis Queries & Dashboard Preparation
-- OULAD Education Analytics Project - Updated Version
-- =====================================================

-- Set database context
USE eduanalytics;

-- Query 1: Overall Performance Distribution
SELECT 
    'Performance Distribution' as analysis_type,
    final_result,
    COUNT(*) as student_count,
    ROUND(COUNT(*) * 100.0 / SUM(COUNT(*)) OVER(), 2) as percentage
FROM eduanalytics.v_student_analytics_master
GROUP BY final_result
ORDER BY student_count DESC;

In [0]:
-- Query 2: Engagement vs Academic Performance
SELECT 
    'Engagement vs Performance' as analysis_type,
    engagement_level,
    final_result,
    COUNT(*) as student_count,
    ROUND(AVG(average_score), 2) as avg_assessment_score,
    ROUND(AVG(total_clicks), 2) as avg_total_clicks
FROM eduanalytics.v_student_analytics_master
WHERE final_result IS NOT NULL
GROUP BY engagement_level, final_result
ORDER BY engagement_level, final_result;

In [0]:
-- Query 3: Demographics Impact Analysis
SELECT 
    'Demographics Analysis' as analysis_type,
    gender,
    age_band,
    COUNT(*) as student_count,
    ROUND(AVG(total_clicks), 2) as avg_clicks,
    ROUND(AVG(average_score), 2) as avg_score,
    ROUND(SUM(CASE WHEN final_result IN ('Pass', 'Distinction') THEN 1 ELSE 0 END) * 100.0 / COUNT(*), 2) as success_rate
FROM eduanalytics.v_student_analytics_master
WHERE final_result IS NOT NULL AND average_score IS NOT NULL
GROUP BY gender, age_band
ORDER BY success_rate DESC;

In [0]:
-- Query 4: Module Performance Comparison
SELECT 
    'Module Comparison' as analysis_type,
    code_module,
    COUNT(*) as total_students,
    ROUND(AVG(total_clicks), 2) as avg_clicks,
    ROUND(AVG(average_score), 2) as avg_score,
    ROUND(SUM(CASE WHEN final_result IN ('Pass', 'Distinction') THEN 1 ELSE 0 END) * 100.0 / COUNT(*), 2) as success_rate,
    ROUND(AVG(unique_resources_accessed), 2) as avg_resources_used
FROM eduanalytics.v_student_analytics_master
WHERE final_result IS NOT NULL
GROUP BY code_module
ORDER BY success_rate DESC;


In [0]:
-- Query 5: Early Warning Indicators
SELECT 
    'Early Warning Analysis' as analysis_type,
    code_module,
    code_presentation,
    id_student,
    gender,
    age_band,
    final_result,
    total_clicks,
    average_score,
    total_assessments_submitted,
    CASE 
        WHEN total_clicks < 100 AND average_score < 40 THEN 'High Risk'
        WHEN total_clicks < 300 OR average_score < 50 THEN 'Medium Risk'
        ELSE 'Low Risk'
    END as risk_level
FROM eduanalytics.v_student_analytics_master
WHERE final_result IS NOT NULL AND average_score IS NOT NULL
ORDER BY 
    CASE 
        WHEN total_clicks < 100 AND average_score < 40 THEN 1
        WHEN total_clicks < 300 OR average_score < 50 THEN 2
        ELSE 3
    END,
    average_score ASC;


In [0]:
-- Query 6: Assessment Type Performance Analysis (Fixed)
WITH assessment_scores AS (
    SELECT 
        'TMA' as assessment_type,
        avg_tma_score as score,
        id_student
    FROM eduanalytics.v_student_analytics_master
    WHERE avg_tma_score IS NOT NULL
    
    UNION ALL
    
    SELECT 
        'CMA' as assessment_type,
        avg_cma_score as score,
        id_student
    FROM eduanalytics.v_student_analytics_master
    WHERE avg_cma_score IS NOT NULL
    
    UNION ALL
    
    SELECT 
        'Exam' as assessment_type,
        avg_exam_score as score,
        id_student
    FROM eduanalytics.v_student_analytics_master
    WHERE avg_exam_score IS NOT NULL
)
SELECT 
    'Assessment Type Analysis' as analysis_type,
    assessment_type,
    COUNT(*) as students_with_scores,
    ROUND(AVG(score), 2) as average_score,
    ROUND(MIN(score), 2) as min_score,
    ROUND(MAX(score), 2) as max_score,
    ROUND(STDDEV(score), 2) as score_std_dev
FROM assessment_scores
GROUP BY assessment_type
ORDER BY average_score DESC;



In [0]:
-- Query 7: Temporal Engagement Patterns (Fixed date calculation)
SELECT 
    'Temporal Patterns' as analysis_type,
    code_module,
    code_presentation,
    CASE 
        WHEN DATEDIFF(first_interaction_date, DATE_ADD('2013-02-01', 0)) <= 7 THEN 'Early Starter (Week 1)'
        WHEN DATEDIFF(first_interaction_date, DATE_ADD('2013-02-01', 0)) <= 30 THEN 'Regular Starter (Month 1)'
        WHEN DATEDIFF(first_interaction_date, DATE_ADD('2013-02-01', 0)) <= 60 THEN 'Late Starter (Month 2)'
        ELSE 'Very Late Starter'
    END as engagement_timing,
    COUNT(*) as student_count,
    ROUND(AVG(total_clicks), 2) as avg_total_clicks,
    ROUND(AVG(average_score), 2) as avg_score,
    ROUND(SUM(CASE WHEN final_result IN ('Pass', 'Distinction') THEN 1 ELSE 0 END) * 100.0 / COUNT(*), 2) as success_rate
FROM eduanalytics.v_student_analytics_master
WHERE first_interaction_date IS NOT NULL AND final_result IS NOT NULL
GROUP BY code_module, code_presentation, 
    CASE 
        WHEN DATEDIFF(first_interaction_date, DATE_ADD('2013-02-01', 0)) <= 7 THEN 'Early Starter (Week 1)'
        WHEN DATEDIFF(first_interaction_date, DATE_ADD('2013-02-01', 0)) <= 30 THEN 'Regular Starter (Month 1)'
        WHEN DATEDIFF(first_interaction_date, DATE_ADD('2013-02-01', 0)) <= 60 THEN 'Late Starter (Month 2)'
        ELSE 'Very Late Starter'
    END
ORDER BY code_module, code_presentation, success_rate DESC;

In [0]:
-- Query 8: Correlation Analysis - Key Metrics
SELECT 
    'Correlation Analysis' as analysis_type,
    'Clicks vs Score' as metric_pair,
    ROUND(CORR(total_clicks, average_score), 3) as correlation_coefficient,
    COUNT(*) as sample_size
FROM eduanalytics.v_student_analytics_master
WHERE total_clicks > 0 AND average_score IS NOT NULL

UNION ALL

SELECT 
    'Correlation Analysis' as analysis_type,
    'Resources vs Score' as metric_pair,
    ROUND(CORR(unique_resources_accessed, average_score), 3) as correlation_coefficient,
    COUNT(*) as sample_size
FROM eduanalytics.v_student_analytics_master
WHERE unique_resources_accessed > 0 AND average_score IS NOT NULL

UNION ALL

SELECT 
    'Correlation Analysis' as analysis_type,
    'Engagement Span vs Score' as metric_pair,
    ROUND(CORR(engagement_span_days, average_score), 3) as correlation_coefficient,
    COUNT(*) as sample_size
FROM eduanalytics.v_student_analytics_master
WHERE engagement_span_days > 0 AND average_score IS NOT NULL;

In [0]:
-- Query 9: Detailed Performance Breakdown by Education Level
SELECT 
    'Education Level Analysis' as analysis_type,
    highest_education,
    COUNT(*) as student_count,
    ROUND(AVG(total_clicks), 2) as avg_clicks,
    ROUND(AVG(average_score), 2) as avg_score,
    ROUND(AVG(unique_resources_accessed), 2) as avg_resources,
    ROUND(SUM(CASE WHEN final_result = 'Pass' THEN 1 ELSE 0 END) * 100.0 / COUNT(*), 2) as pass_rate,
    ROUND(SUM(CASE WHEN final_result = 'Distinction' THEN 1 ELSE 0 END) * 100.0 / COUNT(*), 2) as distinction_rate,
    ROUND(SUM(CASE WHEN final_result = 'Withdrawn' THEN 1 ELSE 0 END) * 100.0 / COUNT(*), 2) as withdrawal_rate
FROM eduanalytics.v_student_analytics_master
WHERE final_result IS NOT NULL
GROUP BY highest_education
ORDER BY avg_score DESC;


In [0]:
-- Query 10: Additional Analysis - Previous Attempts Impact
SELECT 
    'Previous Attempts Analysis' as analysis_type,
    num_of_prev_attempts,
    COUNT(*) as student_count,
    ROUND(AVG(total_clicks), 2) as avg_clicks,
    ROUND(AVG(average_score), 2) as avg_score,
    ROUND(SUM(CASE WHEN final_result IN ('Pass', 'Distinction') THEN 1 ELSE 0 END) * 100.0 / COUNT(*), 2) as success_rate,
    ROUND(SUM(CASE WHEN final_result = 'Withdrawn' THEN 1 ELSE 0 END) * 100.0 / COUNT(*), 2) as withdrawal_rate
FROM eduanalytics.v_student_analytics_master
WHERE final_result IS NOT NULL
GROUP BY num_of_prev_attempts
ORDER BY num_of_prev_attempts;

In [0]:
-- Query 11: Disability Support Analysis
SELECT 
    'Disability Analysis' as analysis_type,
    disability,
    COUNT(*) as student_count,
    ROUND(AVG(total_clicks), 2) as avg_clicks,
    ROUND(AVG(average_score), 2) as avg_score,
    ROUND(SUM(CASE WHEN final_result IN ('Pass', 'Distinction') THEN 1 ELSE 0 END) * 100.0 / COUNT(*), 2) as success_rate,
    ROUND(AVG(unique_resources_accessed), 2) as avg_resources_used
FROM eduanalytics.v_student_analytics_master
WHERE final_result IS NOT NULL
GROUP BY disability
ORDER BY success_rate DESC;

In [0]:
-- Query 12: Summary Dashboard View
CREATE OR REPLACE VIEW eduanalytics.v_dashboard_summary AS
SELECT 
    -- Overall metrics
    COUNT(*) as total_students,
    COUNT(DISTINCT code_module) as total_modules,
    COUNT(DISTINCT CONCAT(code_module, '_', code_presentation)) as total_presentations,
    
    -- Performance metrics
    ROUND(AVG(average_score), 2) as overall_avg_score,
    ROUND(SUM(CASE WHEN final_result IN ('Pass', 'Distinction') THEN 1 ELSE 0 END) * 100.0 / COUNT(*), 2) as overall_success_rate,
    
    -- Engagement metrics
    ROUND(AVG(total_clicks), 2) as avg_total_clicks,
    ROUND(AVG(unique_resources_accessed), 2) as avg_resources_accessed,
    
    -- Risk distribution
    ROUND(SUM(CASE WHEN total_clicks < 100 AND average_score < 40 THEN 1 ELSE 0 END) * 100.0 / COUNT(*), 2) as high_risk_percentage,
    ROUND(SUM(CASE WHEN total_clicks < 300 OR average_score < 50 THEN 1 ELSE 0 END) * 100.0 / COUNT(*), 2) as medium_risk_percentage,
    
    -- Demographics
    ROUND(SUM(CASE WHEN gender = 'F' THEN 1 ELSE 0 END) * 100.0 / COUNT(*), 2) as female_percentage,
    ROUND(SUM(CASE WHEN disability = 'Y' THEN 1 ELSE 0 END) * 100.0 / COUNT(*), 2) as disability_percentage,
    
    -- Engagement levels
    ROUND(SUM(CASE WHEN engagement_level = 'High' THEN 1 ELSE 0 END) * 100.0 / COUNT(*), 2) as high_engagement_percentage,
    ROUND(SUM(CASE WHEN engagement_level = 'No Engagement' THEN 1 ELSE 0 END) * 100.0 / COUNT(*), 2) as no_engagement_percentage
    
FROM eduanalytics.v_student_analytics_master
WHERE final_result IS NOT NULL;

-- Display the dashboard summary
SELECT * FROM eduanalytics.v_dashboard_summary;


In [0]:
-- Final data export queries for visualization tools
-- Export 1: Student performance data for dashboards
SELECT 
    code_module,
    code_presentation,
    final_result,
    gender,
    age_band,
    highest_education,
    disability,
    total_clicks,
    average_score,
    engagement_level,
    unique_resources_accessed,
    total_assessments_submitted,
    CASE 
        WHEN total_clicks < 100 AND average_score < 40 THEN 'High Risk'
        WHEN total_clicks < 300 OR average_score < 50 THEN 'Medium Risk'
        ELSE 'Low Risk'
    END as risk_level
FROM eduanalytics.v_student_analytics_master
WHERE final_result IS NOT NULL
ORDER BY code_module, code_presentation, average_score DESC;

In [0]:
-- Export 2: Module-level summary for reporting
SELECT 
    code_module,
    code_presentation,
    COUNT(*) as total_students,
    ROUND(AVG(total_clicks), 2) as avg_clicks,
    ROUND(AVG(average_score), 2) as avg_score,
    ROUND(SUM(CASE WHEN final_result IN ('Pass', 'Distinction') THEN 1 ELSE 0 END) * 100.0 / COUNT(*), 2) as success_rate,
    ROUND(SUM(CASE WHEN final_result = 'Withdrawn' THEN 1 ELSE 0 END) * 100.0 / COUNT(*), 2) as withdrawal_rate,
    ROUND(AVG(unique_resources_accessed), 2) as avg_resources_used,
    COUNT(CASE WHEN engagement_level = 'High' THEN 1 END) as high_engagement_students,
    COUNT(CASE WHEN engagement_level = 'No Engagement' THEN 1 END) as no_engagement_students
FROM eduanalytics.v_student_analytics_master
WHERE final_result IS NOT NULL
GROUP BY code_module, code_presentation
ORDER BY code_module, code_presentation;

In [0]:
-- Export 3: Time-series data for trend analysis (if needed)
SELECT 
    code_module,
    code_presentation,
    DATE_TRUNC('week', first_interaction_date) as week_start,
    COUNT(*) as students_starting_engagement,
    AVG(total_clicks) as avg_eventual_clicks,
    AVG(average_score) as avg_eventual_score
FROM eduanalytics.v_student_analytics_master
WHERE first_interaction_date IS NOT NULL AND final_result IS NOT NULL
GROUP BY code_module, code_presentation, DATE_TRUNC('week', first_interaction_date)
ORDER BY code_module, code_presentation, week_start;