# Instructions

1. Download the complete grades from Canvas
1. Download the `Individual Student Reports` -> `Score Reports` from the MFT admin portal.  
   This is a PDF and is the only way to get percentile scores for the current cohort.
1. Manually enter the MFT percentile scores in the `ETS Major Field Test` column.
1. Modify the filenames and parameters as needed
1. Run the notebook

In [None]:
import pandas as pd
import scipy.interpolate as interp
import numpy as np
import re
import collections
import operator

pd.options.display.width = 200
pd.options.display.max_columns = 100

# Constants and adjustable parameters

In [None]:
canvas_input = '2025-05-22T1418_Grades-PHYS_497-17796-SP2025.csv'
canvas_output = 'final_grades.csv'
solar_output = 'solar_grades.csv'
# grade category weights
weights = collections.OrderedDict((
    ('Homework and Participation Grade Point', 0.15),
    ('Final Presentation', 0.425),
    ('Final Report', 0.425))
)

# boost or penalty thresholds for MFT
mft_thresholds = collections.OrderedDict((
    ('boost', 30),
    ('penalty', 0)
))

# rounding thresholds for final letter grades
final_thresholds = collections.OrderedDict((
    (3.7, 4.0),
    (3.3, 3.7),
    (3.0, 3.3),
    (2.7, 3.0),
    (2.3, 2.7),
    (2.0, 2.3),
    (1.7, 2.0),
    (1.3, 1.7),
    (1.0, 1.3),
    (0.7, 1.0),
    (0.0, 0)
))

#“incomplete assignment” 4-point grade thresholds for each category
professionalism_thresholds = collections.OrderedDict((
    ('Preparation', collections.OrderedDict(((4, 1), (3, 2), (2, 3), (1, 4), (0, 5)))),
    ('Participation', collections.OrderedDict(((4, 2), (3, 4), (2, 6), (1, 8), (0, 10)))),
    ('Practice', collections.OrderedDict(((4, 1), (3, 1), (2, 2), (1, 3), (0, 4)))),
    ('Peer-Review', collections.OrderedDict(((4, 1), (3, 2), (2, 3), (1, 4), (0, 5))))
))

# Map letter grades (including plus/minus) to numeric 4-point scale
numeric_map = collections.OrderedDict((    
    ('A', 4.0),  ('A-', 3.7), ('B+', 3.3), ('B', 3.0),  ('B-', 2.7),
    ('C+', 2.3), ('C', 2.0),  ('C-', 1.7), ('D+', 1.3), ('D', 1.0),
    ('D-', 0.7), ('F', 0.0)
))

# Helper functions

In [None]:
def find_columns(df, names):
    """
    Finds and returns the column names in a DataFrame that match the given names followed by (\d+).

    Parameters:
    df (pandas.DataFrame): The DataFrame to search for columns.
    names (list of str): A list of names to match against the column names in the DataFrame. 
                         Each name is expected to match a column name followed by ' (\d+)'.

    Returns:
    list of str: A list of column names that match the given names.

    Raises:
    ValueError: If no columns are found for a given name or if multiple columns match a given name.

    Example:
    >>> find_columns(canvas, ['Select five CSUNposium talks to attend'])
    ['Select five CSUNposium talks to attend (2211230)']
    """
    columns = []
    for name in names:
        match_columns = [col for col in canvas.columns if re.match(name + r' \(\d+\)', col)]
        if not match_columns:
            raise ValueError(f"No columns found for: {name}")
        if len(match_columns) > 1:
            raise ValueError(f"Multiple columns found for: {name}")
        columns.append(match_columns[0])
    return columns

def count_complete_assignments(df, columns):
    """
    Counts the number of complete assignments for each row in the specified columns of a DataFrame.

    The function assumes that a value > 0 in a column indicates a complete assignment.

    Parameters:
    df (pandas.DataFrame): The DataFrame containing the assignment data.
    columns (list of str): A list of column names to evaluate for completeness.

    Returns:
    pandas.Series: A Series containing the count of complete assignments for each row.

    Example:
    >>> count_complete_assignments(canvas, ['Week 1 Participation (2211279)', 'Week 2 Participation (2211287)'])
    0    0
    1    2
    2    1
    dtype: int64
    """
    df = df[columns].copy()
    # Convert all values to numeric, forcing errors to NaN
    df = df.apply(pd.to_numeric, errors='coerce')
    # Replace all values in the columns with 1 if the value is > 1
    df = df[columns].applymap(lambda x: 1 if x > 1 else x)
    # For each row, count the number of values that are non-null and non-zero in the columns
    return df[columns].sum(axis=1)

def calculate_professionalism_grade(df, columns, thresholds=professionalism_thresholds):
    """
    Calculates the professionalism grade for each student based on the given columns and thresholds.

    This function computes a numeric grade for each student by:
    1. Calculating the number of missing assignments for each category.
    2. Interpolating the numeric grade based on the thresholds for each category.
    3. Computing the final numeric grade using an anchored grading system.

    Parameters:
    df (pandas.DataFrame): The DataFrame containing the assignment data.
    columns (list of str): A list of column names to evaluate for professionalism grading.
    thresholds (OrderedDict): A dictionary mapping categories to their respective thresholds for grading.

    Returns:
    pandas.Series: A Series containing the professionalism grade for each student.

    Example:
    >>> calculate_professionalism_grade(canvas, ['Preparation', 'Participation', 'Practice', 'Peer-Review'])
    2    3.7
    3    4.0
    4    3.3
    dtype: float64
    """
    def compute_final(grades: pd.Series) -> float:
        """
        Compute the anchored final numeric grade from a Series of four category scores:
        - Anchor = minimum of the four scores.
        - count_next = number of categories >= (anchor + 1.0).
        - Raw score:
            >=2 at next level → anchor + 0.7
            =1 at next level  → anchor + 0.3
            else             → anchor
        - Snap down to nearest allowed step in the 0.0, 0.3, 0.7, 1.0, …, 4.0 scale.
        """
        anchor = grades.min()
        count_next = (grades >= anchor + 1.0).sum()
        if count_next >= 2:
            raw = anchor + 0.7
        elif count_next == 1:
            raw = anchor + 0.3
        else:
            raw = anchor

        allowed = sorted(numeric_map.values())
        return max(v for v in allowed if v <= raw + 1e-8)
    
    df = df[columns].copy()
    
    # Calculate the number of missing assignments
    df.loc[1:, :] = df.loc[1, :] - df.loc[1:, :]
    
    # Assign a numeric grade by interpolating the thresholds
    for col in df.columns:
        category = re.sub(r' \(\d+\)', '', col)
        # Get the threshold for the current column
        threshold = thresholds[category]
        # Interpolate the numeric grade based on the number of missing assignments
        lin_interp = interp.interp1d(list(threshold.values()), list(threshold.keys()), fill_value=(4, 0), bounds_error=False)
        df[col] = lin_interp(df[col])
    
    return df.apply(compute_final, axis=1)

def apply_grade_threshold(x, thresholds=final_thresholds):
    if pd.isna(x):
        return x
    for v in thresholds.keys():
        if x >= v:
            return thresholds[v]
    return thresholds.values()[-1]


# Read in the canvas spreadsheet

In [None]:
canvas = pd.read_csv(canvas_input)
# Drop the last row
canvas = canvas.drop(canvas.index[-1])
canvas

# Read in the MFT results

In [None]:
# MFT percentile scores should be entered directly into the canvas spreadsheet.  
# The custom report spreadsheets do not contain percentile scores.

# mft = pd.read_excel('ets/ETSMajorFieldTestsDYOAR_May-21-2025_11-36-13.xlsx')
# mft

# Calculate the Professionalism score

# Calculate Preparation

In [None]:

# preparation assignments
prep_assignments = [
    "Anatomy of a research paper",
    "Next Action", 
    "Schedule your recurring meeting with your advisor", 
    "Next Action Followup",
    "Read the Whitesides paper", 
    "Read the Schön Report", 
    "Essay for next year's students",
    "Select five CSUNposium talks to attend"
]
# For each assignment, get the column names from the canvas dataframe that is the assignment name plus ' (\d+)'
prep_assignments_columns = find_columns(canvas, prep_assignments)

canvas[find_columns(canvas, ['Preparation'])[0]] = count_complete_assignments(canvas, prep_assignments_columns)
print(canvas[find_columns(canvas, ['Preparation'])])


## Calculate Peer-Review

In [None]:
peer_review_assignments = [
    "Project Outline #1 - peer review", 
    "Practice Presentation #1 - peer review", 
    "Project Outline #2 - peer review", 
    "Project Outline #3 Peer Review", 
    "CSUNposium Reviews", 
    "Final Report Draft #1 - peer reviews", 
    "Practice Presentation #2 - peer reviews", 
    "Final Report Draft #2 - peer reviews"
]
peer_review_assignments_columns = find_columns(canvas, peer_review_assignments)
canvas[find_columns(canvas, ['Peer-Review'])[0]] = count_complete_assignments(canvas, peer_review_assignments_columns)
print(canvas[find_columns(canvas, ['Peer-Review'])])

## Calculate Participation

In [None]:
participation_assignments = [
    "Week 1 Participation",
    "Week 2 Participation",
    "Week 3 Participation",
    "Week 4 Participation",
    "Week 5 Participation",
    "Week 6 Participation",
    "Week 7 Participation",
    "Week 8 Participation",
    # "Week 9 Participation",
    "Week 10 Participation",
    "Week 11 Participation",
    "Week 12 Participation",
    "Week 13 Participation",
    "Week 14 Participation",
    "Week 15 Participation",
    "Week 16 Participation"
]
participation_assignments_columns = find_columns(canvas, participation_assignments)
canvas[find_columns(canvas, ['Participation'])[0]] = count_complete_assignments(canvas, participation_assignments_columns)
print(canvas[find_columns(canvas, ['Participation'])])

## Calculate Practice and Drafts

In [None]:
practice_assignments = [
    "Create your elevator pitch", 
    "Project Outline #1", 
    "Practice Presentation #1",
    "Project Outline #2",
    "Project Outline #3",
    "Final Report Draft #1",
    "Practice Presentation #2", 
    "Final Report Draft #2"
]
practice_assignments_columns = find_columns(canvas, practice_assignments)
canvas[find_columns(canvas, ['Practice'])[0]] = count_complete_assignments(canvas, practice_assignments_columns)
print(canvas[find_columns(canvas, ['Practice'])])

## Calculate the Professionalism grade

In [None]:
canvas[find_columns(canvas, ['Homework and Participation Grade Point'])[0]] = (
    calculate_professionalism_grade(canvas, find_columns(canvas, ['Preparation', 'Participation', 'Practice', 'Peer-Review'])))
print(canvas[find_columns(canvas, ['Homework and Participation Grade Point'])])

# Calculate the final grade without the MFT

In [None]:
canvas = canvas.set_index('Student')
# print(canvas[find_columns(canvas, weights.keys())])
final_col = find_columns(canvas, ['Final'])[0]

# compute the dot product of the columns of the Professionalism, Final Presentation, and Final Report columns with the weights
canvas[final_col] = (
    canvas[find_columns(canvas, weights.keys())].apply(pd.to_numeric, errors='coerce') @ list(weights.values()))

print(canvas[[final_col]])
# round to a letter-grade numeric-value using the thresholds
canvas[final_col] = canvas[final_col].apply(lambda x: apply_grade_threshold(x, final_thresholds))

print(canvas[[final_col]])


# Apply the MFT correction

In [None]:
def mod_grade_by_one(x, operator):
    """
    Adjusts the grade by one level based on the given modifier.

    Parameters:
    x (float): The original grade.
    mod (int): The modifier to adjust the grade.

    Returns:
    float: The adjusted grade.
    """
    values = list(sorted(numeric_map.values()))
    if pd.isna(x):
        return x
    
    return values[max(min(operator(values.index(x),1), len(values) - 1), 0)]

# apply the MFT modifier to the MFT score
final_col = find_columns(canvas, ['Final'])[0]
mft_col = find_columns(canvas, ['ETS Major Field Test'])[0]
# canvas[final_col] = canvas[final_col].apply(lambda x: mod_grade_by_one(x, operator.sub))

boost_mask = canvas[mft_col] > mft_thresholds['boost']
canvas.loc[boost_mask,final_col] = canvas.loc[boost_mask, final_col].apply(lambda x: mod_grade_by_one(x, operator.add))
drop_mask = canvas[mft_col] < mft_thresholds['penalty']
canvas.loc[drop_mask,final_col] = canvas.loc[drop_mask, final_col].apply(lambda x: mod_grade_by_one(x, operator.sub))

print(canvas[final_col])



# Output the final grades for Canvas

In [None]:
# canvas.reset_index().to_csv(canvas_output, index=False)
canvas.reset_index()[['Student', 'ID', 'SIS User ID', 'SIS Login ID', 'Section'] 
                     + find_columns(canvas, ['Preparation', 'Practice', 'Participation', 'Peer-Review', 'Homework and Participation Grade Point', 'ETS Major Field Test', 'Final'])].to_csv(canvas_output, index=False)

# Output the final grades for SOLAR

In [None]:
canvas.loc[:,'Final Letter Grade'] = canvas[final_col].apply(lambda x: next((k for k, v in numeric_map.items() if v == x), None))
# print(canvas[['Final Letter Grade']])
canvas.reset_index()[['SIS User ID','Final Letter Grade']].to_csv(solar_output, index=False)