# **The 3 key features of this project are**

## 1) Gender & GPA Diversity: Groups are first separated by gender, then sorted by GPA in descending order for one gender and ascending order for the other. This ensures that the first two members in each group are diverse in both GPA and gender, promoting balanced diversity from the start.

## 2) Alternating Grouping: The gender-separated groups are recombined in alternating order, ensuring gender and CGPA diversity within the final list.

## 3) Dynamic School Inequality Adjustment: To handle school distribution differences across batches, the school inequality is raised to a variable power that adjusts according to the batch’s school distribution, ensuring fair representation across diverse batches.

<img src="assets\Explanation\1. Intro.png" width=750px>

<img src="assets\Explanation\2. Read records.csv(1).png" width=750px>
<img src="assets\Explanation\2. Read records.csv(2).png" width=750px>



In [1]:
# Create a function to read CSV file as lists
def read_csv_to_list(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        # Initialize an empty list to hold the rows
        csv_reader = []
        
        # Read the file line by line
        for line in file:
            # Strip newline characters and split by comma
            row = line.strip().split(',')
            # Append the row to csv_reader
            csv_reader.append(row)
    
    return csv_reader

# Create a function to read CSV file as dictionaries, with column headers as keys, content in each rows as values
def read_csv_to_dict(file_path):
    """Reads a CSV file and returns a list of dictionaries, simulating csv.DictReader."""
    with open(file_path, 'r') as file:
        # Read all lines from the file
        lines = file.readlines()

    # Strip newline characters and split the lines into a list of lists
    data = [line.strip().split(',') for line in lines]

    # The first row is the header
    headers = data[0]

    # Create a list to hold the resulting dictionaries
    result = []

    # Iterate over the remaining rows
    for row in data[1:]:
        # Create a dictionary for each row
        row_dict = {headers[i]: row[i] for i in range(len(headers))}
        result.append(row_dict)

    return result

<img src="assets\Explanation\2. Read records.csv(2).png" width=750px>


In [2]:
# Define the path to your CSV file
file_path = 'assets/records.csv'  

# Read the CSV file with the created function
csv_reader = read_csv_to_list(file_path)

# Read and print each row (for demonstration)
num_of_rows = 0
for row in csv_reader:
    if num_of_rows<52:
        print(row)  # You can process each row here
        num_of_rows+=1
    else:
        break

['Tutorial Group', 'Student ID', 'School', 'Name', 'Gender', 'CGPA']
['G-1', '5002', 'CCDS', 'Aarav Singh', 'Male', '4.02']
['G-1', '3838', 'EEE', 'Aarti Nair', 'Female', '4.05']
['G-1', '2091', 'EEE', 'Adlan Bin Rahman', 'Male', '4.2']
['G-1', '288', 'CoB (NBS)', 'Ajay Verma', 'Male', '4.01']
['G-1', '4479', 'CCDS', 'Amelia Kim', 'Female', '4.11']
['G-1', '5708', 'SoH', 'Ananya Ramesh', 'Male', '4.2']
['G-1', '4563', 'WKW SCI', 'Anjali Patel', 'Female', '4.01']
['G-1', '3989', 'WKW SCI', 'Anthony Liu', 'Male', '4.15']
['G-1', '2115', 'EEE', 'Anya Kumar', 'Female', '4.03']
['G-1', '1383', 'CoB (NBS)', 'Areeba Khan', 'Female', '4.19']
['G-1', '1417', 'CoE', 'Darren Lee', 'Male', '4.12']
['G-1', '1075', 'CoB (NBS)', 'Felix Yip', 'Male', '4.08']
['G-1', '3148', 'EEE', 'Gabriel Young', 'Male', '3.88']
['G-1', '4402', 'CCDS', 'Grace Turner', 'Female', '4.08']
['G-1', '945', 'MAE', 'Han Li', 'Female', '4.1']
['G-1', '2230', 'CoB (NBS)', 'Harlow Wang', 'Female', '4.18']
['G-1', '4520', 'EEE',

<img src="assets\Explanation\4. Collect tutorial groups.png" width=750px>

In [3]:
# Step 1: Read CSV and collect distinct tutorial groups
tutorial_groups = set()  # Use a set to ensure unique values

dict_reader = read_csv_to_dict(file_path)
for row in dict_reader:
    tutorial_groups.add(row['Tutorial Group'])  # Add each tutorial group to the set
        
# Step 2: Initialize dictionary with tutorial groups as keys and 10 empty lists as values
tutorial_groupings_dict = {group: [[] for _ in range(10)] for group in tutorial_groups}
#print(tutorial_groupings_dict)

<img src="assets\Explanation\5. Convert to indexed dictionary.png" width=750px>


In [4]:
# Function to convert CSV to a dictionary with row index as keys
def csv_to_indexed_dict(csv_file):
    data_dict = {}
    
    reader = read_csv_to_dict(csv_file)
    # Loop through each row and index it starting from 0 (or 1)
    for index, row in enumerate(reader, start=0):  # Start index from 0, or change to start=1 if you prefer
        data_dict[index] = row
    
    return data_dict

# Example usage
csv_file = 'assets/records.csv'  # Replace with your CSV file name
records_indexed_dict = csv_to_indexed_dict(csv_file)
#print(records_indexed_dict)
#records_indexed_dict

<img src="assets\Explanation\6. Gini coefficienct gender(1).png" width=750px>
<img src="assets\Explanation\6. Gini coefficienct gender(2).png" width=750px>


In [5]:
def calculate_gini_coefficient_gender(males, females):
    total_people = males + females
    
    if total_people == 0:
        return 0.0  # To avoid division by zero
    
    gender_counts = [males, females]
    n = len(gender_counts)
    
    # Calculate Gini coefficient
    numerator = 0
    for i in range(n):
        for j in range(n):
            numerator += abs(gender_counts[i] - gender_counts[j])
    
    gini_coefficient = numerator / (2 * total_people * total_people)
    penalty_factor=2*total_people**4
    gini_coefficient=gini_coefficient+penalty_factor

    return gini_coefficient

<img src="assets\Explanation\7. Gini coefficient school (1).png" width=750px>
<img src="assets\Explanation\7. Gini coefficient school (2).png" width=750px>


In [6]:
def calculate_gini_coefficient_school(school_dict):
    # Extract the number of students
    student_counts = list(school_dict.values())
    n = len(student_counts)
    
    if n == 0:
        return 0.0  # No schools
    
    # Calculate the mean number of students
    mean_students = sum(student_counts) / n

    # Calculate the Gini coefficient
    numerator = 0
    for i in range(n):
        for j in range(n):
            numerator += abs(student_counts[i] - student_counts[j])
    
    gini_coefficient = numerator / (2 * n * sum(student_counts))
    penalty_factor=2*n**2
    gini_coefficient=gini_coefficient+penalty_factor
    
    return gini_coefficient

<img src="assets\Explanation\8. Gini coefficient school no penalty (1).png" width=750px>
<img src="assets\Explanation\8. Gini coefficient school no penalty (2).png" width=750px>
.

In [7]:
def calculate_gini_coefficient_school_no_penalty(school_dict):
    # Extract the number of students
    student_counts = list(school_dict.values())
    n = len(student_counts)
    
    if n == 0:
        return 0.0  # No schools
    
    # Calculate the mean number of students
    mean_students = sum(student_counts) / n

    # Calculate the Gini coefficient
    numerator = 0
    for i in range(n):
        for j in range(n):
            numerator += abs(student_counts[i] - student_counts[j])
    
    gini_coefficient = numerator / (2 * n * sum(student_counts))
    
    return gini_coefficient

<img src="assets\Explanation\9. Gini coefficient cgpa (1).png" width=750px>
<img src="assets\Explanation\9. Gini coefficient cgpa (2).png" width=750px>



In [8]:
def calculate_gini_coefficient_cgpa(cgpas):
    n = len(cgpas)
    
    if n == 0:
        return 0.0  # No CGPAs
    cgpas=[float(ele)for ele in cgpas]
    # Calculate the mean CGPA
    mean_cgpa = sum(cgpas) / n

    # Calculate the Gini coefficient
    numerator = 0
    for i in range(n):
        for j in range(n):
            numerator += abs(cgpas[i] - cgpas[j])
    
    gini_coefficient = numerator / (2 * n * sum(cgpas))
    penalty_factor=2*n**2
    gini_coefficient=gini_coefficient+penalty_factor

    return gini_coefficient


<img src="assets\Explanation\10. Calculate stats (1).png" width=750px>
<img src="assets\Explanation\10. Calculate stats (2).png" width=750px>


In [9]:
def calculate_stats(list_of_students,school_inequality):
    M=0
    F=0
    schools={}
    CGPA=[]
    for student in list_of_students:
        if student['Gender']=='Male':
            M+=1
        else:
            F+=1
        if student['School'] in schools.keys():
            schools[student['School']]=schools[student['School']]+1
        else:
            schools[student['School']]=1
        CGPA.append(student['CGPA'])
        
    G=calculate_gini_coefficient_gender(M,F)**3
    S=calculate_gini_coefficient_school(schools)**(3*school_inequality)
    C=calculate_gini_coefficient_cgpa(CGPA)**2
    final=0.4*G+0.4*S+0.2*C
    return final
    


<img src="assets\Explanation\11. Merge dictionaries (1).png" width=750px>
<img src="assets\Explanation\11. Merge dictionaries (2).png" width=300px>
<img src="assets\Explanation\11. Merge dictionaries (3).png" width=750px>


In [10]:
def merge_dictionaries(dict1, dict2):
    # Create a list to hold the values in alternating order
    values_list = []

    # Get the maximum length of the two dictionaries
    max_length = max(len(dict1), len(dict2))

    # Iterate through the range of maximum length
    for i in range(max_length):
        if i < len(dict1):  # Check if index is valid for dict1
            values_list.append(dict1[list(dict1.keys())[i]])
        
        if i < len(dict2):  # Check if index is valid for dict2
            values_list.append(dict2[list(dict2.keys())[i]])

    # Convert the list to a new dictionary with new index as the key
    new_dict = {index: value for index, value in enumerate(values_list)}

    # Get the number of key-value pairs
    num_pairs = len(new_dict)

    return new_dict, num_pairs

<img src="assets\Explanation\12. Sorter.png" width=750px>


In [11]:
# Create a function to sort students by CGPA in dictionary, bubble sort is employed
def sorter(student_dict, descending=False):
    """
    Sort a dictionary of students by CGPA using bubble sort.

    Parameters:
    student_dict (dict): A dictionary where keys are student IDs and values are dictionaries with student info.
    descending (bool): If True, sort in descending order; otherwise, sort in ascending order.

    Returns:
    dict: A new dictionary sorted by CGPA.
    """
    # Convert the dictionary items to a list of tuples for sorting
    items = list(student_dict.items())
    n = len(items)

    # Bubble sort algorithm
    for i in range(n):
        for j in range(0, n-i-1):
            # Compare CGPA values of adjacent items based on the desired order
            if (not descending and items[j][1]["CGPA"] > items[j + 1][1]["CGPA"]) or \
               (descending and items[j][1]["CGPA"] < items[j + 1][1]["CGPA"]):
                # Swap if the current item is out of order
                items[j], items[j + 1] = items[j + 1], items[j]

    # Convert the sorted list of tuples back into a dictionary
    sorted_dict = {key: value for key, value in items}
    return sorted_dict


#Guide to Use
'''
# Sort in ascending order
sorted_student_dict_ascending = sorter(student_dict)

# Sort in descending order
sorted_student_dict_descending = sorter(student_dict, descending=True)

'''


'\n# Sort in ascending order\nsorted_student_dict_ascending = sorter(student_dict)\n\n# Sort in descending order\nsorted_student_dict_descending = sorter(student_dict, descending=True)\n\n'

<img src="assets\Explanation\13. Insert initial (1).png" width=750px>
<img src="assets\Explanation\13. Insert initial (2).png" width=750px>
<img src="assets\Explanation\13. Insert initial (3).png" width=750px>
<img src="assets\Explanation\13. Insert initial (4).jpg" width=750px>
<img src="assets\Explanation\13. Insert initial (5).png" width=750px>

In [12]:
def insert_initial(batch,records_indexed_dict_copy,tutorial_groupings_dict_copy):
    starting_index=(batch-1)*50
    ending_index=batch*50
    tut_grp=records_indexed_dict_copy[starting_index]['Tutorial Group']
    batch_records_indexed_dict_copy = {k: v for k, v in records_indexed_dict_copy.items() if starting_index <= k < ending_index}
    schools={}
    for key, value in batch_records_indexed_dict_copy.items():
        # Extract the value for the "scholl" key in the nested dictionary
        school = value.get('School')
        if school in schools.keys():
            schools[school]=schools[school]+1
        else:
            schools[school]=1
    school_inequality=calculate_gini_coefficient_school_no_penalty(schools)
    males_batch_records_indexed_dict_copy = {k: v for k, v in batch_records_indexed_dict_copy.items() if v['Gender'] == 'Male'}
    females_batch_records_indexed_dict_copy = {k: v for k, v in batch_records_indexed_dict_copy.items() if v['Gender'] == 'Female'}
    descending_males_batch_records_indexed_dict_copy = sorter(males_batch_records_indexed_dict_copy)
    descending_males_batch_records_indexed_dict_copy = {index: value for index, (_, value) in enumerate(descending_males_batch_records_indexed_dict_copy.items())}
    ascending_females_batch_records_indexed_dict_copy = sorter(females_batch_records_indexed_dict_copy,descending = True)
    ascending_females_batch_records_indexed_dict_copy = {index: value for index, (_, value) in enumerate(ascending_females_batch_records_indexed_dict_copy.items())}
    for i in range(0,10):
        tutorial_groupings_dict_copy[tut_grp][i].append(descending_males_batch_records_indexed_dict_copy[i])
        tutorial_groupings_dict_copy[tut_grp][i].append(ascending_females_batch_records_indexed_dict_copy[i])
    remaining_descending_males_batch_records_indexed_dict_copy = {k: v for k, v in descending_males_batch_records_indexed_dict_copy.items() if k not in range(10)}
    remaining_ascending_females_batch_records_indexed_dict_copy = {k: v for k, v in ascending_females_batch_records_indexed_dict_copy.items() if k not in range(10)}
    return tutorial_groupings_dict_copy,remaining_descending_males_batch_records_indexed_dict_copy,remaining_ascending_females_batch_records_indexed_dict_copy,tut_grp,school_inequality

<img src="assets\Explanation\14. Insert Rest (1).png" width=680px>
<img src="assets\Explanation\14. Insert Rest (2).png" width=750px>
<img src="assets\Explanation\14. Insert Rest (3).png" width=750px>
<img src="assets\Explanation\14. Insert Rest (4).png" width=750px>
<img src="assets\Explanation\14. Insert Rest (5).png" width=500px>
<img src="assets\Explanation\14. Insert Rest (6).png" width=750px>


In [13]:
def insert_rest(tut_grp,tutorial_groupings_dict_copy,males,females,school_inequality):
    remaining,amt=merge_dictionaries(males,females)
    for i in range(amt):
        #print(f"Right now we are at student {i+1}")
        student=remaining[i]
        change=[]
        for j in range(10):
            if len(tutorial_groupings_dict_copy[tut_grp][j])<5:
                old_score=calculate_stats(tutorial_groupings_dict_copy[tut_grp][j],school_inequality)
                new_students=tutorial_groupings_dict_copy[tut_grp][j].copy()
                new_students.append(student)
                new_score=calculate_stats(new_students,school_inequality)
                change_in_score=new_score-old_score
                change.append(change_in_score)
            else:
                change.append(9999999999999999)
        group=change.index(min(change))
        #print(f"The change is {change}")
        #print(f"We add this student to group {group}")
        tutorial_groupings_dict_copy[tut_grp][group].append(student)
    return tutorial_groupings_dict_copy
    

<img src="assets\Explanation\15. Sort-output function.png" width=750px>


In [14]:
def sort(num_batch,tutorial_groupings_dict,records_indexed_dict):
    tutorial_groupings_dict_copy=tutorial_groupings_dict
    records_indexed_dict_copy=records_indexed_dict
    batch=1
    while batch<num_batch+1:
        tutorial_groupings_dict_copy,remaining_batch_males,remaining_batch_females,tut_grp,school_inequality=insert_initial(batch,records_indexed_dict_copy,tutorial_groupings_dict_copy)
        tutorial_groupings_dict_copy=insert_rest(tut_grp,tutorial_groupings_dict_copy,remaining_batch_males,remaining_batch_females,school_inequality) 
        #print(f"Batch {batch} is done for tutorial group {tut_grp}")
        batch+=1 
    return tutorial_groupings_dict

output=sort(120,tutorial_groupings_dict,records_indexed_dict)
#print(output)


<img src="assets\Explanation\16. Evaluation (1).png" width=750px>


In [15]:
check_group = "G-88"
len(output[check_group][0])

5

In [16]:
for j in range(10):
    print(len(output[check_group][j]))

5
5
5
5
5
5
5
5
5
5


<img src="assets\Explanation\16. Evaluation (2).png" width=750px>


In [17]:
def count_males_in_nested_lists(data):
    male_counts = []
    
    # Iterate over each nested list
    for nested_list in data:
        # Count dictionaries with "Gender" equal to "Male"
        male_count = sum(1 for d in nested_list if d.get('Gender') == 'Male')
        male_counts.append(male_count)
    
    return male_counts



result = count_males_in_nested_lists(output[check_group])
print(result)  # This will give you a list with the count of "Male" in each nested list


[3, 3, 3, 3, 3, 2, 3, 2, 2, 3]


<img src="assets\Explanation\16. Evaluation (3).png" width=750px>


In [18]:
from collections import Counter

def count_schools_in_nested_lists(data):
    school_counts_list = []
    
    # Iterate over each nested list
    for nested_list in data:
        # Extract the "School" values and count their occurrences
        school_counter = Counter(d.get('School') for d in nested_list if 'School' in d)
        school_counts_list.append(school_counter)
    
    return school_counts_list



result = count_schools_in_nested_lists(output[check_group])
for i, school_count in enumerate(result):
    print(f"List {i+1}: {school_count}")


List 1: Counter({'EEE': 3, 'ADM': 1, 'CoB (NBS)': 1})
List 2: Counter({'MSE': 2, 'CoB (NBS)': 2, 'EEE': 1})
List 3: Counter({'CCEB': 2, 'CCDS': 1, 'SBS': 1, 'CoE': 1})
List 4: Counter({'SoH': 2, 'CCDS': 1, 'EEE': 1, 'CoB (NBS)': 1})
List 5: Counter({'CoE': 2, 'CCEB': 2, 'NIE': 1})
List 6: Counter({'SoH': 3, 'SSS': 1, 'LKCMedicine': 1})
List 7: Counter({'CCEB': 2, 'MAE': 2, 'SPMS': 1})
List 8: Counter({'CCDS': 2, 'CEE': 1, 'WKW SCI': 1, 'EEE': 1})
List 9: Counter({'CoB (NBS)': 3, 'EEE': 2})
List 10: Counter({'MAE': 3, 'SBS': 2})


<img src="assets\Explanation\16. Evaluation (4).png" width=750px>
<img src="assets\Explanation\16. Evaluation (5).png" width=750px>

In [19]:
import math

def calculate_standard_deviation(data):
    # Function to calculate standard deviation
    n = len(data)
    mean = sum(data) / n
    variance = sum((x - mean) ** 2 for x in data) / n
    return math.sqrt(variance)

# List to store standard deviations
std_devs = []

# Calculate standard deviation for each of the 10 lists
for sublist in output[check_group]:
    cgpa_values = [float(d['CGPA']) for d in sublist]
    std_dev = calculate_standard_deviation(cgpa_values)
    std_devs.append(std_dev)

# Output the results
for index, std_dev in enumerate(std_devs, 1):
    print(f"Standard deviation of list {index}: {std_dev:.2f}")

Standard deviation of list 1: 0.14
Standard deviation of list 2: 0.13
Standard deviation of list 3: 0.11
Standard deviation of list 4: 0.10
Standard deviation of list 5: 0.08
Standard deviation of list 6: 0.08
Standard deviation of list 7: 0.06
Standard deviation of list 8: 0.05
Standard deviation of list 9: 0.06
Standard deviation of list 10: 0.06


__Distribution Charts__

<img src="assets\Explanation\16. Evaluation (6).png" width=750px>
<img src="assets\Explanation\16. Evaluation (7).png" width=750px>
<img src="assets\Explanation\16. Evaluation (8).png" width=750px>

<img src="assets\Explanation\17. Export.png" width=750px>

In [20]:
# Define the CSV file name
csv_file = 'FDAC_Team5_TohFuTangLevon.csv'

# Define the column headers
column_headers = ["Tutorial Group", "Student ID", "School", "Name", "Gender", "CGPA", "Team Assigned"]

# Open the CSV file for writing
with open(csv_file, mode='w') as file:
    # Write the column headers
    file.write(','.join(column_headers) + '\n')
    
    # Sort the tutorial groups using a custom key
    sorted_keys = sorted(output.keys(), key=lambda x: int(x.split('-')[1]))
    
    for tutorial_group in sorted_keys:
        teams = output[tutorial_group]
        # Loop through each team and keep track of the team number
        for team_number, team in enumerate(teams, start=1):
            # Loop through each student in the team
            for student in team:
                # Create a row with student details, including the team number
                row = [
                    tutorial_group,
                    student['Student ID'],
                    student['School'],
                    student['Name'],
                    student['Gender'],
                    student['CGPA'],
                    team_number
                ]
                # Write the row to the CSV file
                file.write(','.join(map(str, row)) + '\n')