In [4]:
#!/usr/bin/env python3
"""
Student Performance Dashboard (SVG-in-HTML, headless matplotlib)

Tracks per-student:
- Module completion % (weighted/unweighted)
- Login streaks (current + longest, HackerRank-style)
- Hours spent (last 7/30 days, total)
- Cumulative modules completed over time
- Login calendar heatmap (last 12 weeks)

Input CSVs (customize paths as needed):
- students.csv:            student_id, name, cohort, program   [optional]
- student_progress.csv:    student_id, module_id, module_title, status, module_weight, updated_at
- student_logins.csv:      student_id, login_time
- student_time.csv:        student_id, date, minutes_spent

Output:
- dashboard.html (self-contained)
"""

import os
import io
import numpy as np
import pandas as pd
import matplotlib
matplotlib.use('Agg')  # ensure headless
import matplotlib.pyplot as plt
from matplotlib.ticker import PercentFormatter
from datetime import datetime, date, timedelta

# ----------- Config -----------
TARGET_STUDENT_ID = 'S123'   # <-- CHANGE to target student_id
STUDENTS_CSV       = 'students.csv'            # optional
PROGRESS_CSV       = 'student_progress.csv'
LOGINS_CSV         = 'student_logins.csv'
TIME_CSV           = 'student_time.csv'
OUTPUT_HTML        = 'dashboard_stu.html'

PALETTE = {
    'blue':  '#1f3b73',
    'green': '#34d399',
    'orange':'#f59e0b',
    'red':   '#d9534f',
    'indigo':'#6366f1',
    'gray':  '#6b778c',
    'light':'#f2f4f8'
}

# ----------- Helpers -----------
def read_csv_safe(path, dtype=None, parse_dates=None):
    if not os.path.exists(path):
        return pd.DataFrame()
    try:
        return pd.read_csv(path, dtype=dtype, parse_dates=parse_dates)
    except Exception as e:
        print(f"Warning: failed reading {path}: {e}")
        return pd.DataFrame()

def normalize_date(series):
    """Return date-only (YYYY-MM-DD) series from mixed datetime/date string."""
    s = pd.to_datetime(series, errors='coerce')
    return s.dt.date

def fig_to_svg(fig):
    buf = io.StringIO()
    fig.savefig(buf, format='svg', bbox_inches='tight')
    plt.close(fig)
    svg = buf.getvalue()
    # strip XML declaration (to avoid duplicates)
    if svg.lstrip().startswith('<?xml'):
        svg = svg.split('\n', 1)[-1]
    return svg

def compute_streak(login_dates):
    """
    login_dates: iterable of python date objects (unique dates)
    Returns (current_streak_days, longest_streak_days, last_login_date)
    """
    if len(login_dates) == 0:
        return 0, 0, None
    days = sorted(set(login_dates))
    longest = 1
    current = 1
    last_login = days[-1]
    run = 1
    for i in range(1, len(days)):
        if (days[i] - days[i-1]).days == 1:
            run += 1
        else:
            longest = max(longest, run)
            run = 1
    longest = max(longest, run)

    # Current streak counts consecutive days ending at today (if logged today) or at last_login
    # HackerRank shows "current streak" ending at today if you’ve logged today.
    # We'll compute streak ending at last_login; optionally adjust to today.
    # If last_login is today or yesterday and you keep logging, it reflects current habit.
    current = 1
    d = last_login
    while (d - timedelta(days=1)) in set(days):
        d = d - timedelta(days=1)
        current += 1
    return current, longest, last_login

def pct(a, b):
    return (a / b) if b > 0 else 0.0

# ----------- Load data -----------
students = read_csv_safe(STUDENTS_CSV)
progress = read_csv_safe(PROGRESS_CSV)
logins   = read_csv_safe(LOGINS_CSV)
times    = read_csv_safe(TIME_CSV)

# Filter to target student
if not progress.empty:
    progress = progress[progress['student_id'].astype(str) == str(TARGET_STUDENT_ID)].copy()
if not logins.empty:
    logins = logins[logins['student_id'].astype(str) == str(TARGET_STUDENT_ID)].copy()
if not times.empty:
    times = times[times['student_id'].astype(str) == str(TARGET_STUDENT_ID)].copy()

# Student meta
student_row = None
student_name = f"Student {TARGET_STUDENT_ID}"
student_program = ""
student_cohort = ""
if not students.empty:
    srow = students[students['student_id'].astype(str) == str(TARGET_STUDENT_ID)]
    if not srow.empty:
        student_row = srow.iloc[0]
        student_name = str(student_row.get('name', student_name)) or student_name
        student_program = str(student_row.get('program', "")) or ""
        student_cohort = str(student_row.get('cohort', "")) or ""

# ----------- Prepare metrics -----------
# Progress: completion %
if not progress.empty:
    # Normalize
    progress['status'] = progress['status'].astype(str).str.strip().str.lower()
    # Weight handling
    if 'module_weight' in progress.columns:
        progress['module_weight'] = pd.to_numeric(progress['module_weight'], errors='coerce').fillna(1.0)
    else:
        progress['module_weight'] = 1.0
    # Completed modules
    comp_mask = progress['status'] == 'completed'
    completed_weight = progress.loc[comp_mask, 'module_weight'].sum()
    total_weight = progress['module_weight'].sum()
    completion_pct = pct(completed_weight, total_weight)
    completed_count = comp_mask.sum()
    total_modules = len(progress)
    # Completion timeline (completed modules over time)
    if 'updated_at' in progress.columns:
        progress['updated_at'] = pd.to_datetime(progress['updated_at'], errors='coerce')
        comp_timeline = progress.loc[comp_mask, ['updated_at']].dropna().sort_values('updated_at')
    else:
        comp_timeline = pd.DataFrame(columns=['updated_at'])
else:
    completion_pct = 0.0
    completed_count = 0
    total_modules = 0
    comp_timeline = pd.DataFrame(columns=['updated_at'])

# Logins: streaks + calendar
if not logins.empty:
    logins['login_time'] = pd.to_datetime(logins['login_time'], errors='coerce')
    login_days = normalize_date(logins['login_time']).dropna()
    login_days_set = set(login_days)
    current_streak, longest_streak, last_login_date = compute_streak(login_days_set)
else:
    login_days_set = set()
    current_streak, longest_streak, last_login_date = 0, 0, None

# Time spent: hours
if not times.empty:
    times['date'] = pd.to_datetime(times['date'], errors='coerce').dt.date
    times['minutes_spent'] = pd.to_numeric(times['minutes_spent'], errors='coerce').fillna(0)
    total_minutes = int(times['minutes_spent'].sum())
    total_hours = total_minutes / 60.0

    # Last 7/30 days
    today = date.today()
    d7 = today - timedelta(days=6)
    d30 = today - timedelta(days=29)
    last7 = int(times.loc[(times['date'] >= d7) & (times['date'] <= today), 'minutes_spent'].sum())
    last30 = int(times.loc[(times['date'] >= d30) & (times['date'] <= today), 'minutes_spent'].sum())
    last7_hours = last7 / 60.0
    last30_hours = last30 / 60.0

    # Daily minutes (last 30 days) to plotted series
    # Ensure all days present with 0 if missing
    day_range = pd.date_range(d30, today, freq='D').date
    daily = pd.DataFrame({'date': day_range})
    agg = times.groupby('date', as_index=False)['minutes_spent'].sum()
    daily = daily.merge(agg, on='date', how='left').fillna({'minutes_spent': 0})
else:
    total_minutes = 0
    total_hours = 0.0
    last7_hours = 0.0
    last30_hours = 0.0
    today = date.today()
    d30 = today - timedelta(days=29)
    daily = pd.DataFrame({'date': pd.date_range(d30, today, freq='D').date, 'minutes_spent': 0})

# ----------- Charts (inline SVG) -----------
svg_completion = ""
svg_calendar = ""
svg_daily_minutes = ""
svg_cumulative_modules = ""

# Donut chart for completion %
fig = plt.figure(figsize=(3.6, 3.6))
completed = completion_pct
remaining = max(0.0, 1.0 - completed)
vals = [completed, remaining]
labels = [f"Completed {completed*100:.1f}%", f"Remaining {remaining*100:.1f}%"]
colors = [PALETTE['green'], '#e5e7ef']
wedges, texts = plt.pie(vals, startangle=90, colors=colors, wedgeprops={'width':0.35, 'edgecolor':'white'})
plt.legend(wedges, labels, loc='lower center', bbox_to_anchor=(0.5, -0.05), ncol=2, fontsize=9)
plt.title('Module Completion', color=PALETTE['blue'], fontsize=12, pad=6)
svg_completion = fig_to_svg(fig)

# Login calendar heatmap (last 12 weeks)
weeks = 12
start_day = today - timedelta(days=weeks*7 - 1)
cal_days = [start_day + timedelta(days=i) for i in range(weeks*7)]
arr = np.array([1 if d in login_days_set else 0 for d in cal_days], dtype=int).reshape(weeks, 7)
fig = plt.figure(figsize=(7, 2.6))
plt.imshow(arr.T, aspect='auto', cmap='Greens', interpolation='nearest')
plt.yticks(range(7), ['Mon','Tue','Wed','Thu','Fri','Sat','Sun'], fontsize=8)
week_labels = [ (start_day + timedelta(days=i*7)).strftime('%b %d') for i in range(weeks) ]
plt.xticks(range(weeks), week_labels, rotation=45, ha='right', fontsize=8)
plt.title('Login Calendar (last 12 weeks)', color=PALETTE['blue'], fontsize=12, pad=6)
plt.tight_layout()
svg_calendar = fig_to_svg(fig)

# Daily minutes (last 30 days)
fig = plt.figure(figsize=(7, 2.8))
x = [d.strftime('%b %d') for d in pd.to_datetime(daily['date']).dt.date]
y = daily['minutes_spent'].values
plt.bar(x, y, color=PALETTE['indigo'], alpha=0.85)
plt.xticks(rotation=45, ha='right', fontsize=8)
plt.ylabel('Minutes', fontsize=9)
plt.title('Daily Time Spent (last 30 days)', color=PALETTE['blue'], fontsize=12, pad=6)
plt.grid(True, axis='y', linestyle='--', alpha=0.3)
plt.tight_layout()
svg_daily_minutes = fig_to_svg(fig)

# Cumulative modules completed over time
if not comp_timeline.empty:
    comp_timeline = comp_timeline.dropna().sort_values('updated_at')
    xdates = comp_timeline['updated_at'].dt.date.values
    steps = np.arange(1, len(xdates)+1)
    fig = plt.figure(figsize=(7, 2.8))
    plt.step([d.strftime('%b %d') for d in xdates], steps, where='post', color=PALETTE['green'])
    plt.xticks(rotation=45, ha='right', fontsize=8)
    plt.ylabel('Completed modules', fontsize=9)
    plt.title('Cumulative Modules Completed', color=PALETTE['blue'], fontsize=12, pad=6)
    plt.grid(True, axis='y', linestyle='--', alpha=0.3)
    plt.tight_layout()
    svg_cumulative_modules = fig_to_svg(fig)
else:
    fig = plt.figure(figsize=(7, 2.8))
    plt.text(0.5, 0.5, 'No completion timeline available', ha='center', va='center')
    plt.axis('off')
    svg_cumulative_modules = fig_to_svg(fig)

# ----------- HTML ----------
now_str = datetime.now().strftime('%Y-%m-%d %H:%M')
last_login_str = last_login_date.strftime('%Y-%m-%d') if last_login_date else '—'

def kpi_card(label, value, subtitle=None):
    sub = f"<div class='kpi-sub'>{subtitle}</div>" if subtitle else ""
    return f"""
    <div class='kpi'>
      <div class='kpi-label'>{label}</div>
      <div class='kpi-value'>{value}</div>
      {sub}
    </div>
    """

kpi_html = "".join([
    kpi_card('Completion', f"{completion_pct*100:.1f}%",
             f"{completed_count}/{total_modules} modules"),
    kpi_card('Current Streak', f"{current_streak} days",
             f"Last login: {last_login_str}"),
    kpi_card('Longest Streak', f"{longest_streak} days"),
    kpi_card('Hours (7d)', f"{last7_hours:.1f} h"),
    kpi_card('Hours (30d)', f"{last30_hours:.1f} h"),
    kpi_card('Total Hours', f"{total_hours:.1f} h"),
])

html = f"""
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset='utf-8'>
<meta name='viewport' content='width=device-width, initial-scale=1'>
<title>Student Performance Dashboard</title>
<style>
  body {{ font-family: Arial, Helvetica, sans-serif; margin: 0; background:#f7f8fb; color:#222; }}
  header {{ background:{PALETTE['blue']}; color:#fff; padding: 16px 24px; }}
  header h1 {{ margin:0; font-size: 20px; }}
  header .sub {{ opacity:0.85; font-size: 12px; }}
  .container {{ padding: 20px 24px; }}
  .kpis {{ display:grid; grid-template-columns: repeat(auto-fit, minmax(220px, 1fr)); grid-gap: 16px; margin-bottom: 20px; }}
  .kpi {{ background:#fff; border: 1px solid #e5e7ef; border-radius: 12px; padding: 18px 20px; box-shadow: 0 1px 2px rgba(0,0,0,0.04); }}
  .kpi-label {{ font-size:13px; color:{PALETTE['gray']}; text-transform:uppercase; letter-spacing:.4px; }}
  .kpi-value {{ font-size:28px; font-weight:700; margin-top:2px; color:{PALETTE['blue']}; }}
  .kpi-sub {{ font-size:12px; color:#667085; margin-top:6px; }}
  .section-title {{ font-size:16px; font-weight:700; margin: 18px 0 8px; color:{PALETTE['blue']}; }}
  .card {{ background:#fff; border:1px solid #e5e7ef; border-radius:10px; padding:14px; margin-bottom:16px; box-shadow:0 1px 2px rgba(0,0,0,0.04); }}
  .grid-2 {{ display:grid; grid-template-columns: 1fr 1fr; grid-gap:12px; }}
  .grid-3 {{ display:grid; grid-template-columns: 1fr 1fr 1fr; grid-gap:12px; }}
  @media (max-width:900px) {{ .grid-2 {{ grid-template-columns: 1fr; }} .grid-3 {{ grid-template-columns: 1fr; }} }}
  .chart svg {{ width:100%; height:auto; border-radius:8px; border:1px solid #eee; }}
  .footer {{ font-size:12px; color:#667085; margin-top:24px; }}
</style>
</head>
<body>
<header>
  <h1>Student Performance Dashboard</h1>
  <div class='sub'>{student_name} • Program: {student_program or '—'} • Cohort: {student_cohort or '—'} • Generated {now_str}</div>
</header>
<div class='container'>

  <div class='kpis'>
    {kpi_html}
  </div>

  <div class='grid-3'>
    <div class='card chart'>
      <div class='section-title'>Module Completion</div>
      {svg_completion}
    </div>
    <div class='card chart'>
      <div class='section-title'>Login Calendar (last 12 weeks)</div>
      {svg_calendar}
    </div>
    <div class='card'>
      <div class='section-title'>Streak Notes</div>
      <div style='font-size:13px;color:#444;line-height:1.5'>
        Current streak counts <b>consecutive login days</b> ending at the last login date.
        Longest streak is the <b>maximum consecutive days</b> observed.
        To maintain a streak, login daily without breaks.
      </div>
    </div>
  </div>

  <div class='grid-2'>
    <div class='card chart'>
      <div class='section-title'>Daily Time Spent (last 30 days)</div>
      {svg_daily_minutes}
    </div>
    <div class='card chart'>
      <div class='section-title'>Cumulative Modules Completed</div>
      {svg_cumulative_modules}
    </div>
  </div>

  <div class='footer'>
    Definitions: Completion % = sum(module weights for completed) / sum(all module weights).
    Hours = minutes / 60. Streaks computed on date-only logins (unique days).
  </div>
</div>
</body>
</html>
"""

with open(OUTPUT_HTML, 'w', encoding='utf-8') as f:
    f.write(html)

print(f"Saved {OUTPUT_HTML} for student_id={TARGET_STUDENT_ID}")


Matplotlib is building the font cache; this may take a moment.


Saved dashboard_stu.html for student_id=S123
