In [17]:
import csv
import itertools
import os
import random
import sys


def read_students(start_index=0, count=50, filepath="records.csv"):
    """Read up to count students starting from start_index (after header).
    Returns a list of student dicts with keys: tutorial group, id, school, name, gender, gpa
    """
    students = []
    if not os.path.exists(filepath):
        raise FileNotFoundError(f"{filepath} not found")

    with open(filepath, newline='', encoding='utf-8') as f:
        reader = csv.reader(f)
        headers = next(reader, None)  # skip header if present

        # skip start_index rows
        for _ in range(start_index):
            try:
                next(reader)
            except StopIteration:
                return students

        for _ in range(count):
            try:
                row = next(reader)
            except StopIteration:
                break
            if len(row) < 6:
                # skip malformed line
                continue
            try:
                gpa = float(row[5])
            except Exception:
                # skip rows with non-numeric GPA
                continue

            student = {
                'tutorial group': row[0].strip(),
                'id': row[1].strip(),
                'school': row[2].strip(),
                'name': row[3].strip(),
                'gender': row[4].strip(),
                'gpa': gpa,
            }
            students.append(student)

    return students
    
read_students(start_index=0, count=50, filepath="records.csv")


[{'tutorial group': 'G-1',
  'id': '5002',
  'school': 'CCDS',
  'name': 'Aarav Singh',
  'gender': 'Male',
  'gpa': 4.02},
 {'tutorial group': 'G-1',
  'id': '3838',
  'school': 'EEE',
  'name': 'Aarti Nair',
  'gender': 'Female',
  'gpa': 4.05},
 {'tutorial group': 'G-1',
  'id': '2091',
  'school': 'EEE',
  'name': 'Adlan Bin Rahman',
  'gender': 'Male',
  'gpa': 4.2},
 {'tutorial group': 'G-1',
  'id': '288',
  'school': 'CoB (NBS)',
  'name': 'Ajay Verma',
  'gender': 'Male',
  'gpa': 4.01},
 {'tutorial group': 'G-1',
  'id': '4479',
  'school': 'CCDS',
  'name': 'Amelia Kim',
  'gender': 'Female',
  'gpa': 4.11},
 {'tutorial group': 'G-1',
  'id': '5708',
  'school': 'SoH',
  'name': 'Ananya Ramesh',
  'gender': 'Male',
  'gpa': 4.2},
 {'tutorial group': 'G-1',
  'id': '4563',
  'school': 'WKW SCI',
  'name': 'Anjali Patel',
  'gender': 'Female',
  'gpa': 4.01},
 {'tutorial group': 'G-1',
  'id': '3989',
  'school': 'WKW SCI',
  'name': 'Anthony Liu',
  'gender': 'Male',
  'gpa':

In [18]:
def count_total_students(filepath="records.csv"):
    """Count total number of student records (excluding header)."""
    if not os.path.exists(filepath):
        return 0
        
    with open(filepath, newline='', encoding='utf-8') as f:
        reader = csv.reader(f)
        headers = next(reader, None)  # skip header
        return sum(1 for row in reader)

        
count_total_students(filepath="records.csv")


6000

In [4]:
def form_groups(students, num_groups=10):
    """Form groups from a list of student dicts.
    Returns list of groups (each group is a list of students).
    """
    if not students:
        return []

    students_sorted = sorted(students, key=lambda x: x['gpa'], reverse=True)

    # split into up to 5 bands as evenly as possible
    n = len(students_sorted)
    band_size = max(1, n // 5)
    bands = [students_sorted[i:i + band_size] for i in range(0, n, band_size)]

    # split each band by gender
    band_males = []
    band_females = []
    for band in bands:
        males = [s for s in band if s['gender'].strip().lower() in ('m', 'male')]
        females = [s for s in band if s['gender'].strip().lower() in ('f', 'female')]
        random.shuffle(males)
        random.shuffle(females)
        band_males.append(males)
        band_females.append(females)

    total_males = sum(len(m) for m in band_males)
    total_females = sum(len(f) for f in band_females)

    if total_males > total_females:
        target_males_per_group, target_females_per_group = 3, 2
    else:
        target_males_per_group, target_females_per_group = 2, 3

    groups = [[] for _ in range(num_groups)]
    male_count = [0] * num_groups
    female_count = [0] * num_groups

    # distribute one student from each band into each group
    for band_idx in range(len(bands)):
        for group_idx in range(num_groups):
            # try to pick a student to respect the gender targets
            picked = False
            if male_count[group_idx] < target_males_per_group and band_males[band_idx]:
                groups[group_idx].append(band_males[band_idx].pop())
                male_count[group_idx] += 1
                picked = True
            elif female_count[group_idx] < target_females_per_group and band_females[band_idx]:
                groups[group_idx].append(band_females[band_idx].pop())
                female_count[group_idx] += 1
                picked = True
            elif band_males[band_idx]:
                groups[group_idx].append(band_males[band_idx].pop())
                male_count[group_idx] += 1
                picked = True
            elif band_females[band_idx]:
                groups[group_idx].append(band_females[band_idx].pop())
                female_count[group_idx] += 1
                picked = True

    return groups

[[{'tutorial group': 'G-1', 'id': '2091', 'school': 'EEE', 'name': 'Adlan Bin Rahman', 'gender': 'Male', 'gpa': 4.2}, {'tutorial group': 'G-1', 'id': '5708', 'school': 'SoH', 'name': 'Ananya Ramesh', 'gender': 'Male', 'gpa': 4.2}], [{'tutorial group': 'G-1', 'id': '4520', 'school': 'EEE', 'name': 'Henry Foster', 'gender': 'Male', 'gpa': 4.11}, {'tutorial group': 'G-1', 'id': '1417', 'school': 'CoE', 'name': 'Darren Lee', 'gender': 'Male', 'gpa': 4.12}, {'tutorial group': 'G-1', 'id': '3989', 'school': 'WKW SCI', 'name': 'Anthony Liu', 'gender': 'Male', 'gpa': 4.15}, {'tutorial group': 'G-1', 'id': '1841', 'school': 'MAE', 'name': 'Jett Morales', 'gender': 'Male', 'gpa': 4.12}, {'tutorial group': 'G-1', 'id': '2776', 'school': 'CCEB', 'name': 'Siddharth Nair', 'gender': 'Male', 'gpa': 4.14}], [{'tutorial group': 'G-1', 'id': '588', 'school': 'MAE', 'name': 'Lucas Walker', 'gender': 'Male', 'gpa': 4.06}, {'tutorial group': 'G-1', 'id': '1075', 'school': 'CoB (NBS)', 'name': 'Felix Yip', 

In [16]:
    
# Sort by GPA band for display (Band 1 = highest GPA, Band 5 = lowest GPA)

for student in g:
        # Determine which band the student came from
        band_number = None
        for band_idx, band in enumerate(bands):
            original_band_students = students_sorted[band_idx*10:(band_idx+1)*10]
            if student in original_band_students:
                band_number = band_idx + 1
                break
        
        gender_lower = student['gender'].lower()
        if gender_lower in ['m','male']:
            males_in_group += 1
        elif gender_lower in ['f', 'female']:
            females_in_group += 1
        total_gpa += student['gpa']
        print(f"  {student['name']} | {student['school']} | {student['gender']} | GPA: {student['gpa']:.2f} | Band: {band_number}")
    
    avg_gpa = total_gpa / len('gpa')
    print(f"  Summary: {males_in_group} males, {females_in_group} females, Average GPA: {avg_gpa:.2f}")
    print()


IndentationError: unindent does not match any outer indentation level (<string>, line 21)