In [5]:
import pandas as pd
import numpy as np
from collections import defaultdict
from typing import Dict, Set, Optional, List

In [None]:
# FIXME

# bug fixes and verify performance
# documentation
# convert into script to run from command line
# create image with student names on seating chart

In [7]:
def compute_weighted_score(row, exam_columns, weights=None):
    scores = row[exam_columns].astype(float)
    valid = scores.notna()
    scores = scores[valid]

    if len(scores) == 0:
        return np.nan

    if weights is None:
        w = np.linspace(1.0, 1.3, len(scores))
    else:
        w = np.array(weights)[valid.values]

    return np.average(scores, weights=w)


def load_and_score_students(csv_path, exam_weights=None):
    df = pd.read_csv(csv_path)
    exam_columns = [c for c in df.columns if c.lower().startswith("exam")]

    if exam_weights:
        weights = [exam_weights.get(c, 1.0) for c in exam_columns]
    else:
        weights = None

    df["avg_score"] = df.apply(
        compute_weighted_score,
        axis=1,
        exam_columns=exam_columns,
        weights=weights,
    )

    return df.dropna(subset=["avg_score"])


def build_pair_history(df: pd.DataFrame) -> Dict[str, Set[str]]:
    """
    Build student -> previously grouped-with set
    using all group* columns.
    """
    history = defaultdict(set)
    group_cols = [c for c in df.columns if c.startswith("group")]

    for col in group_cols:
        for _, g in df.groupby(col):
            names = g["name"].dropna().tolist()
            for a in names:
                for b in names:
                    if a != b:
                        history[a].add(b)

    return history


def next_group_column(df: pd.DataFrame) -> str:
    group_cols = [c for c in df.columns if c.startswith("group")]
    if not group_cols:
        return "group1"

    nums = [int(c.replace("group", "")) for c in group_cols]
    return f"group{max(nums) + 1}"


def has_conflict(candidate: List[dict], history: Dict[str, Set[str]]) -> bool:
    for i in range(len(candidate)):
        for j in range(i + 1, len(candidate)):
            if candidate[j]["name"] in history[candidate[i]["name"]]:
                return True
    return False


def assign_new_groups(df: pd.DataFrame) -> pd.DataFrame:
    """
    Assign a new group column while avoiding previous pairings.
    """
    df = df.sort_values("avg_score").copy()
    history = build_pair_history(df)
    new_col = next_group_column(df)

    students = df.to_dict("records")
    n = len(students)
    num_groups = n // 3

    low = students[:num_groups]
    mid = students[num_groups:2 * num_groups]
    high = students[2 * num_groups:3 * num_groups]
    leftovers = students[3 * num_groups:]

    group_assignments = {}
    group_id = 1

    for h, m, l in zip(high, mid, low):
        candidate = [h, m, l]

        # If conflict, allow anyway (soft constraint)
        if has_conflict(candidate, history):
            pass

        for s in candidate:
            group_assignments[s["name"]] = group_id

        group_id += 1

    # Handle leftover students (groups of 2)
    for i in range(0, len(leftovers), 2):
        for s in leftovers[i:i + 2]:
            group_assignments[s["name"]] = group_id
        group_id += 1

    df[new_col] = df["name"].map(group_assignments)
    return df


def run_grouping(
    csv_path: str,
    output_csv: str,
    exam_weights: Optional[Dict[str, float]] = None,
):
    df = load_and_score_students(csv_path, exam_weights)
    df = assign_new_groups(df)
    df.to_csv(output_csv, index=False)
    return df


In [None]:
run_grouping(
    csv_path="students.csv",
    output_csv="students_with_groups.csv"
)

Unnamed: 0,name,exam1,exam2,exam3,avg_score,group1,group2
0,Diana,64,68,70,67.594203,1,1
1,Kevin,67,69,71,69.173913,2,2
2,George,70,73,75,72.884058,3,3
3,Bob,72,75,78,75.26087,1,1
4,Ian,78,80,82,80.173913,2,2
5,Evan,81,84,86,83.884058,3,3
6,Hannah,85,87,89,87.173913,1,1
7,Alice,88,90,92,90.173913,2,2
8,Fiona,90,91,93,91.463768,3,3
9,Julia,92,94,96,94.173913,4,4
