In [18]:
import pandas as pd
import random

GROUP_SIZE = 5
required_skills = {'leadership', 'technical', 'creative', 'research', 'communication'}
df = pd.read_csv('D:\Temple\s25\students.csv')

In [19]:
df.head()

Unnamed: 0,name,skills,top5projects
0,Alice,"leadership, communication","project1, project2, project3, project4, project5"
1,Bob,"technical, research","project2, project3, project6, project8, project10"
2,Charlie,"creative, technical","project1, project4, project5, project7, project9"
3,Diana,"research, communication","project2, project4, project6, project8, project10"
4,Evan,"leadership, creative","project1, project3, project7, project8, project9"


In [20]:
students = []
for _, row in df.iterrows():
    # Split the comma-separated strings and normalize the entries.
    skills = [s.strip().lower() for s in str(row['skills']).split(',')]
    projects = [p.strip().lower() for p in str(row['top5projects']).split(',')]
    students.append({
        'name': row['name'],
        'skills': skills,
        'top5projects': projects
    })

In [21]:
def diversity_score_group(group):
    """Calculate diversity score based on skills and project interests."""
    group_skills = set()
    group_projects = set()
    for student in group:
        group_skills.update(student['skills'])
        group_projects.update(student['top5projects'])
    # Count only the required skills (if present) for one part of the score.
    score_skills = len(group_skills.intersection(required_skills))
    # Use the number of unique project interests as the second part.
    score_projects = len(group_projects)
    return score_skills + score_projects

In [22]:
def generate_partitions(indices, group_size):
    n_groups = len(students) // group_size
    groups = [[] for _ in range(n_groups)]
    
    # Shuffle students to randomize the order
    random.shuffle(students)
    
    # Assign each student to the group where they contribute the most to diversity
    for student in students:
        best_group_index = None
        best_increase = -1
        
        for i in range(n_groups):
            # Only consider groups that are not yet full
            if len(groups[i]) < group_size:
                current_score = diversity_score_group(groups[i])
                new_score = diversity_score_group(groups[i] + [student])
                increase = new_score - current_score
                if increase > best_increase:
                    best_increase = increase
                    best_group_index = i
        
        # Assign student to the best group found
        groups[best_group_index].append(student)
    
    return groups

In [23]:
groups = generate_partitions(students.copy(), GROUP_SIZE)

In [24]:
for idx, group in enumerate(groups, 1):
    score = diversity_score_group(group)
    print(f"Group {idx} (Diversity Score: {score}):")
    for student in group:
        print(f"  - {student['name']} | Skills: {', '.join(student['skills'])} | Projects: {', '.join(student['top5projects'])}")
    print()

Group 1 (Diversity Score: 15):
  - Evan | Skills: leadership, creative | Projects: project1, project3, project7, project8, project9
  - Julia | Skills: creative, leadership | Projects: project2, project4, project6, project8, project10
  - Kevin | Skills: technical, research | Projects: project1, project3, project7, project8, project9
  - Ian | Skills: technical, communication | Projects: project1, project3, project5, project7, project10
  - Mike | Skills: leadership, technical | Projects: project1, project5, project7, project8, project10

Group 2 (Diversity Score: 15):
  - George | Skills: creative, communication | Projects: project1, project4, project6, project8, project10
  - Fiona | Skills: technical, research | Projects: project2, project3, project5, project7, project10
  - Steve | Skills: technical, creative | Projects: project1, project3, project5, project7, project9
  - Hannah | Skills: leadership, research | Projects: project2, project5, project7, project8, project9
  - Tina | 