# ESS electives report

In [None]:
import pandas as pd
import numpy as np
import re
from pathlib import Path


## Load data

In [None]:
data_path = Path('data')
students_file = data_path / 'students.csv'
courses_file = data_path / 'courses.csv'
choices_file = data_path / 'choices.csv'
students = pd.read_csv(students_file)
courses = pd.read_csv(courses_file)
choices = pd.read_csv(choices_file)


## Processing

In [None]:
# Helper to safely access optional columns
def safe_get(df_or_series, key, default=None):
    """Return df_or_series[key] if key is a valid column/index label and exists; else default."""
    if key is None:
        return default
    try:
        if hasattr(df_or_series, 'columns'):
            return df_or_series[key] if key in df_or_series.columns else default
        if hasattr(df_or_series, 'index'):
            return df_or_series[key] if key in df_or_series.index else default
    except Exception:
        return default
    return default

# prefer course_map_v2 if defined elsewhere
course_map = course_map_v2 if 'course_map_v2' in globals() and course_map_v2 is not None else (globals().get('course_map'))

# Build mapping if not provided
if course_map is None:
    course_map = dict(zip(courses['Course Code'], courses['Course Name']))

# Normalize and enrich choices
choices = choices.copy()
choices['Course Name'] = choices['Course Code'].map(course_map)

# Optional column handling (avoid KeyError: None)
enrol_again_col = None
# Try to detect a likely 'Enrol Again' column name
for c in choices.columns:
    if isinstance(c, str) and re.search(r'enrol\s*again', c, flags=re.IGNORECASE):
        enrol_again_col = c
        break

choices['Enrol Again'] = safe_get(choices, enrol_again_col, default=pd.Series([np.nan] * len(choices), index=choices.index))

# Merge student information
report = choices.merge(students, on='Student ID', how='left')

# Example aggregation used downstream
summary = (report
           .groupby(['Course Code', 'Course Name'])
           .agg(Selections=('Student ID', 'count'),
                EnrolAgain=('Enrol Again', lambda s: s.notna().sum()))
           .reset_index())


## Output

In [None]:
summary.head()