In [3]:
import pandas as pd
import numpy as np
import random
from collections import defaultdict, Counter
from tabulate import tabulate  # For nice table formatting in console output

# Set random seed for reproducibility
random.seed(42)
np.random.seed(42)

# 1. Data Loading and Preprocessing
def load_data():
    """Load and preprocess the data from CSV files"""
    # Load data from CSV files
    courses_df = pd.read_csv('courses.csv')
    student_course_df = pd.read_csv('studentCourse.csv')
    student_names_df = pd.read_csv('studentNames.csv')
    teachers_df = pd.read_csv('teachers.csv')
    
    print(f"Loaded {len(courses_df)} courses")
    print(f"Loaded {len(student_course_df)} student-course enrollments")
    print(f"Loaded {len(student_names_df)} students")
    print(f"Loaded {len(teachers_df)} teachers")
    
    # Remove duplicate courses
    courses_df = courses_df.drop_duplicates(subset=['Course Code'])
    print(f"After removing duplicates: {len(courses_df)} unique courses")
    
    # Get unique course codes
    course_codes = courses_df['Course Code'].unique()
    
    # Get student enrollments for each course
    course_enrollments = {}
    for course in course_codes:
        students = student_course_df[student_course_df['Course Code'] == course]['Student Name'].unique()
        course_enrollments[course] = list(students)
    
    # Get courses for each student
    student_courses = {}
    for student in student_course_df['Student Name'].unique():
        courses = student_course_df[student_course_df['Student Name'] == student]['Course Code'].tolist()
        student_courses[student] = courses
    
    # Count enrollments per course
    course_enrollment_counts = {}
    for course, students in course_enrollments.items():
        course_enrollment_counts[course] = len(students)
    
    # Sort courses by enrollment count (descending)
    sorted_courses = sorted(course_enrollment_counts.items(), key=lambda x: x[1], reverse=True)
    
    # Extract teacher names
    teachers = teachers_df['Names'].tolist()
    
    # Create classroom list (C301 to C310)
    classrooms = [f"C{i}" for i in range(301, 311)]
    
    return {
        'courses_df': courses_df,
        'course_codes': course_codes,
        'course_enrollments': course_enrollments,
        'student_courses': student_courses,
        'sorted_courses': sorted_courses,
        'teachers': teachers,
        'classrooms': classrooms
    }

# 2. Genetic Algorithm Components
class ExamScheduler:
    def __init__(self, data, population_size=50, generations=100, crossover_rate=0.8, mutation_rate=0.2):
        self.data = data
        self.population_size = population_size
        self.generations = generations
        self.crossover_rate = crossover_rate
        self.mutation_rate = mutation_rate
        
        # Define time slots
        self.days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday']
        self.hours = [9, 10, 11, 12, 13, 14, 15, 16]  # 9 AM to 4 PM (each exam is 1 hour)
        
        # Create all possible time slots (day, hour)
        self.time_slots = [(day, hour) for day in self.days for hour in self.hours]
        
        # Remove Friday 13:00 (1 PM) slot for common break
        self.time_slots.remove(('Friday', 13))
        
        # Track faculty meeting slots (to be determined during scheduling)
        self.faculty_meeting_slots = []
        
    def initialize_population(self):
        """Create initial population of random schedules"""
        population = []
        
        for _ in range(self.population_size):
            # Initialize a chromosome (exam schedule)
            chromosome = []
            
            # Copy course list to assign
            courses_to_assign = list(self.data['course_codes'])
            random.shuffle(courses_to_assign)
            
            # Keep track of assigned slots and teachers
            assigned_slots = {}  # (day, hour, classroom) -> course
            teacher_assignments = {}  # (day, hour) -> list of teachers
            
            for course in courses_to_assign:
                assigned = False
                
                # Try to find a valid slot for this course
                attempts = 0
                while not assigned and attempts < 100:
                    attempts += 1
                    
                    # Randomly select day, hour and classroom
                    day = random.choice(self.days)
                    hour = random.choice(self.hours)
                    
                    # Skip Friday 13:00 (1 PM) slot for common break
                    if day == 'Friday' and hour == 13:
                        continue
                    
                    classroom = random.choice(self.data['classrooms'])
                    
                    # Skip if slot+classroom is already assigned
                    slot_key = (day, hour, classroom)
                    if slot_key in assigned_slots:
                        continue
                    
                    # Assign a teacher (who is free at this time)
                    time_key = (day, hour)
                    available_teachers = [t for t in self.data['teachers'] 
                                        if time_key not in teacher_assignments or 
                                        t not in teacher_assignments[time_key]]
                    
                    # Check if teacher had previous consecutive duty
                    if time_key[0] == day and (day, hour-1) in teacher_assignments:
                        available_teachers = [t for t in available_teachers 
                                           if t not in teacher_assignments.get((day, hour-1), [])]
                    
                    if not available_teachers:
                        continue
                    
                    teacher = random.choice(available_teachers)
                    
                    # Add exam to schedule
                    exam = {
                        'course': course,
                        'day': day,
                        'hour': hour,
                        'classroom': classroom,
                        'teacher': teacher
                    }
                    
                    chromosome.append(exam)
                    assigned = True
                    
                    # Update assigned slots and teachers
                    assigned_slots[slot_key] = course
                    
                    if time_key not in teacher_assignments:
                        teacher_assignments[time_key] = []
                    teacher_assignments[time_key].append(teacher)
            
            # Add chromosome to population
            population.append(chromosome)
        
        return population
    
    def fitness(self, chromosome):
        """Calculate fitness score for a chromosome"""
        # Start with a base fitness score
        fitness_score = 0
        penalty = 0
        
        # Check if all courses are scheduled
        scheduled_courses = [exam['course'] for exam in chromosome]
        if len(scheduled_courses) != len(self.data['course_codes']):
            penalty += 1000  # Severe penalty for missing courses
        
        # Track assigned slots and teachers
        assigned_slots = {}  # (day, hour, classroom) -> course
        teacher_assignments = {}  # (day, hour) -> list of teachers
        student_exams = defaultdict(list)  # student -> list of (day, hour) exams
        
        for exam in chromosome:
            course = exam['course']
            day = exam['day']
            hour = exam['hour']
            classroom = exam['classroom']
            teacher = exam['teacher']
            
            # Track assigned slots
            slot_key = (day, hour, classroom)
            time_key = (day, hour)
            
            # Check for duplicate assignments (same slot+classroom)
            if slot_key in assigned_slots:
                penalty += 100
            assigned_slots[slot_key] = course
            
            # Check for teacher conflicts (same time slot)
            if time_key in teacher_assignments and teacher in teacher_assignments[time_key]:
                penalty += 100
            
            if time_key not in teacher_assignments:
                teacher_assignments[time_key] = []
            teacher_assignments[time_key].append(teacher)
            
            # Check teacher consecutive assignments
            if (day, hour-1) in teacher_assignments and teacher in teacher_assignments[(day, hour-1)]:
                penalty += 50  # Penalty for consecutive duties
            
            # Track student exam times
            if course in self.data['course_enrollments']:
                for student in self.data['course_enrollments'][course]:
                    student_exams[student].append((day, hour))
        
        # Check for student exam conflicts
        for student, exams in student_exams.items():
            # Count exam time slots
            exam_times = Counter(exams)
            for time_slot, count in exam_times.items():
                if count > 1:  # Student has multiple exams at the same time
                    penalty += 100 * (count - 1)
            
            # Check for back-to-back exams
            sorted_exams = sorted(exams, key=lambda x: (self.days.index(x[0]), x[1]))
            for i in range(len(sorted_exams) - 1):
                curr_day, curr_hour = sorted_exams[i]
                next_day, next_hour = sorted_exams[i + 1]
                
                if curr_day == next_day and next_hour - curr_hour == 1:
                    penalty += 5  # Small penalty for back-to-back exams
            
            # Check MG before CS constraint
            if student in self.data['student_courses']:
                mg_courses = [c for c in self.data['student_courses'][student] if c.startswith('MG')]
                cs_courses = [c for c in self.data['student_courses'][student] if c.startswith('CS')]
                
                if mg_courses and cs_courses:
                    mg_times = []
                    cs_times = []
                    
                    for exam in chromosome:
                        if exam['course'] in mg_courses:
                            mg_times.append((self.days.index(exam['day']), exam['hour']))
                        elif exam['course'] in cs_courses:
                            cs_times.append((self.days.index(exam['day']), exam['hour']))
                    
                    if mg_times and cs_times:
                        # Check if any MG exam is after any CS exam
                        for mg_time in mg_times:
                            for cs_time in cs_times:
                                if mg_time > cs_time:  # MG exam after CS exam
                                    penalty += 2
        
        # Check Friday 1 PM common break
        for exam in chromosome:
            if exam['day'] == 'Friday' and exam['hour'] == 13:
                penalty += 20  # Penalty for scheduling during common break
        
        # Check for two-hour faculty meeting slots
        # We need to find two slots where at least half faculty is free
        faculty_free_count = defaultdict(int)
        total_faculty = len(self.data['teachers'])
        half_faculty = total_faculty // 2
        
        for time_key in self.time_slots:
            day, hour = time_key
            busy_faculty = set()
            for exam in chromosome:
                if (exam['day'], exam['hour']) == time_key:
                    busy_faculty.add(exam['teacher'])
            
            free_count = total_faculty - len(busy_faculty)
            faculty_free_count[time_key] = free_count
        
        # Find top slots with most free faculty
        sorted_slots = sorted(faculty_free_count.items(), key=lambda x: x[1], reverse=True)
        faculty_meeting_found = False
        
        if len(sorted_slots) >= 2:
            slot1, free1 = sorted_slots[0]
            slot2, free2 = sorted_slots[1]
            
            if free1 >= half_faculty and free2 >= half_faculty:
                faculty_meeting_found = True
                # Bonus for having faculty meeting slots
                fitness_score += 20
        
        if not faculty_meeting_found:
            penalty += 30
        
        # Calculate final fitness score (higher is better)
        final_fitness = 1000 - penalty + fitness_score
        
        # Ensure non-negative fitness
        return max(0, final_fitness)
    
    def tournament_selection(self, population, k=3):
        """Select a parent using tournament selection"""
        # Randomly select k individuals
        tournament = random.sample(population, k)
        
        # Return the one with the highest fitness
        return max(tournament, key=lambda x: self.fitness(x))
    
    def roulette_wheel_selection(self, population):
        """Select a parent using roulette wheel selection"""
        # Calculate fitness for each individual
        fitnesses = [self.fitness(chromosome) for chromosome in population]
        total_fitness = sum(fitnesses)
        
        # Handle case where all fitnesses are 0
        if total_fitness == 0:
            return random.choice(population)
        
        # Generate a random point on the wheel
        selection_point = random.uniform(0, total_fitness)
        
        # Find the individual at that point
        current = 0
        for i, chromosome in enumerate(population):
            current += fitnesses[i]
            if current >= selection_point:
                return chromosome
        
        # Fallback (should not reach here)
        return population[-1]
    
    def crossover(self, parent1, parent2):
        """Perform crossover between two parents"""
        if random.random() > self.crossover_rate:
            return parent1, parent2
        
        # Create mapping of course to exam details for each parent
        p1_mapping = {exam['course']: exam for exam in parent1}
        p2_mapping = {exam['course']: exam for exam in parent2}
        
        # Single point crossover
        crossover_point = random.randint(1, len(self.data['course_codes']) - 1)
        
        # Create children
        child1 = []
        child2 = []
        
        # Get ordered list of courses
        courses = list(self.data['course_codes'])
        random.shuffle(courses)  # Shuffle to avoid bias
        
        for i, course in enumerate(courses):
            if i < crossover_point:
                # First part of crossover: child1 gets from parent1, child2 gets from parent2
                if course in p1_mapping:
                    child1.append(p1_mapping[course])
                if course in p2_mapping:
                    child2.append(p2_mapping[course])
            else:
                # Second part of crossover: child1 gets from parent2, child2 gets from parent1
                if course in p2_mapping:
                    child1.append(p2_mapping[course])
                if course in p1_mapping:
                    child2.append(p1_mapping[course])
        
        return child1, child2
    
    def mutation(self, chromosome):
        """Perform mutation on a chromosome"""
        if random.random() > self.mutation_rate:
            return chromosome
        
        # Copy chromosome
        mutated = chromosome.copy()
        
        # Select a random exam to mutate
        if not mutated:  # Handle empty chromosome
            return mutated
            
        exam_idx = random.randint(0, len(mutated) - 1)
        exam = mutated[exam_idx]
        
        # Select what to mutate (day, hour, classroom, or teacher)
        mutation_type = random.choice(['day', 'hour', 'classroom', 'teacher'])
        
        if mutation_type == 'day':
            exam['day'] = random.choice(self.days)
        elif mutation_type == 'hour':
            new_hour = random.choice(self.hours)
            # Avoid Friday 1 PM slot
            if exam['day'] == 'Friday' and new_hour == 13:
                new_hour = random.choice([h for h in self.hours if h != 13])
            exam['hour'] = new_hour
        elif mutation_type == 'classroom':
            exam['classroom'] = random.choice(self.data['classrooms'])
        elif mutation_type == 'teacher':
            exam['teacher'] = random.choice(self.data['teachers'])
        
        return mutated
    
    def run(self):
        """Run the genetic algorithm"""
        print("Initializing population...")
        population = self.initialize_population()
        
        # Track the best solution and its fitness
        best_solution = None
        best_fitness = -1
        
        # Track average fitness per generation
        avg_fitness_history = []
        best_fitness_history = []
        
        print("Starting evolution...")
        for generation in range(self.generations):
            # Calculate fitness for all chromosomes
            fitnesses = [self.fitness(chromosome) for chromosome in population]
            avg_fitness = sum(fitnesses) / len(fitnesses)
            max_fitness = max(fitnesses)
            best_idx = fitnesses.index(max_fitness)
            
            # Track best solution
            if max_fitness > best_fitness:
                best_fitness = max_fitness
                best_solution = population[best_idx]
            
            # Print progress
            if generation % 10 == 0:
                print(f"Generation {generation}: Avg Fitness = {avg_fitness:.2f}, Best Fitness = {max_fitness:.2f}")
                # Print top 3 solutions
                top_indices = sorted(range(len(fitnesses)), key=lambda i: fitnesses[i], reverse=True)[:3]
                for i, idx in enumerate(top_indices):
                    print(f"  Individual {i+1}: Fitness = {fitnesses[idx]:.2f}, Exams = {len(population[idx])}")
            
            # Store history
            avg_fitness_history.append(avg_fitness)
            best_fitness_history.append(max_fitness)
            
            # Create new population
            new_population = []
            
            # Elitism: keep the best solution
            new_population.append(population[best_idx])
            
            # Fill the rest with offspring
            while len(new_population) < self.population_size:
                # Select parents
                parent1 = self.roulette_wheel_selection(population)
                parent2 = self.roulette_wheel_selection(population)
                
                # Crossover
                child1, child2 = self.crossover(parent1, parent2)
                
                # Mutation
                child1 = self.mutation(child1)
                child2 = self.mutation(child2)
                
                # Add to new population
                new_population.append(child1)
                if len(new_population) < self.population_size:
                    new_population.append(child2)
            
            # Replace population
            population = new_population
        
        print(f"\nEvolution complete. Best fitness: {best_fitness}")
        
        return best_solution, best_fitness, avg_fitness_history, best_fitness_history
    
    def evaluate_solution(self, solution):
        """Evaluate a solution against all constraints"""
        # Track results
        results = {
            'hard_constraints': {
                'all_courses_scheduled': True,
                'no_student_conflicts': True,
                'weekday_only': True,
                'valid_hours': True,
                'one_teacher_per_slot': True,
                'no_consecutive_duties': True
            },
            'soft_constraints': {
                'friday_break': True,
                'no_back_to_back': False,
                'mg_before_cs': False,
                'faculty_meeting': False
            },
            'violations': {
                'missing_courses': [],
                'student_conflicts': [],
                'invalid_days': [],
                'invalid_hours': [],
                'teacher_conflicts': [],
                'consecutive_duties': [],
                'friday_break_violations': [],
                'back_to_back_count': 0,
                'mg_after_cs_count': 0
            }
        }
        
        # Check if all courses are scheduled
        scheduled_courses = [exam['course'] for exam in solution]
        if len(set(scheduled_courses)) != len(self.data['course_codes']):
            results['hard_constraints']['all_courses_scheduled'] = False
            results['violations']['missing_courses'] = [c for c in self.data['course_codes'] if c not in scheduled_courses]
        
        # Track assigned slots and teachers
        assigned_slots = {}  # (day, hour) -> list of (course, classroom, teacher)
        teacher_assignments = {}  # (day, hour) -> list of teachers
        student_exams = defaultdict(list)  # student -> list of (day, hour, course) exams
        
        for exam in solution:
            course = exam['course']
            day = exam['day']
            hour = exam['hour']
            classroom = exam['classroom']
            teacher = exam['teacher']
            
            # Check day validity
            if day not in self.days:
                results['hard_constraints']['weekday_only'] = False
                results['violations']['invalid_days'].append((course, day))
            
            # Check hour validity
            if hour not in self.hours:
                results['hard_constraints']['valid_hours'] = False
                results['violations']['invalid_hours'].append((course, hour))
            
            # Track assigned slots
            time_key = (day, hour)
            
            if time_key not in assigned_slots:
                assigned_slots[time_key] = []
            assigned_slots[time_key].append((course, classroom, teacher))
            
            # Check for teacher conflicts (same time slot)
            if time_key in teacher_assignments and teacher in teacher_assignments[time_key]:
                results['hard_constraints']['one_teacher_per_slot'] = False
                results['violations']['teacher_conflicts'].append((teacher, day, hour))
            
            if time_key not in teacher_assignments:
                teacher_assignments[time_key] = []
            teacher_assignments[time_key].append(teacher)
            
            # Check teacher consecutive assignments
            prev_hour_key = (day, hour-1)
            if prev_hour_key in teacher_assignments and teacher in teacher_assignments[prev_hour_key]:
                results['hard_constraints']['no_consecutive_duties'] = False
                results['violations']['consecutive_duties'].append((teacher, day, hour-1, hour))
            
            # Track student exam times
            if course in self.data['course_enrollments']:
                for student in self.data['course_enrollments'][course]:
                    student_exams[student].append((day, hour, course))
        
        # Check for student exam conflicts
        for student, exams in student_exams.items():
            # Group exams by time slot
            exam_times = defaultdict(list)
            for day, hour, course in exams:
                exam_times[(day, hour)].append(course)
            
            # Check for conflicts (multiple exams at same time)
            for time_slot, courses in exam_times.items():
                if len(courses) > 1:
                    results['hard_constraints']['no_student_conflicts'] = False
                    results['violations']['student_conflicts'].append((student, time_slot, courses))
            
            # Check for back-to-back exams
            sorted_exams = sorted(exams, key=lambda x: (self.days.index(x[0]), x[1]))
            back_to_back_count = 0
            
            for i in range(len(sorted_exams) - 1):
                curr_day, curr_hour, curr_course = sorted_exams[i]
                next_day, next_hour, next_course = sorted_exams[i + 1]
                
                if curr_day == next_day and next_hour - curr_hour == 1:
                    back_to_back_count += 1
            
            results['violations']['back_to_back_count'] += back_to_back_count
            
            # Check MG before CS constraint
            if student in self.data['student_courses']:
                mg_courses = [c for c in self.data['student_courses'][student] if c.startswith('MG')]
                cs_courses = [c for c in self.data['student_courses'][student] if c.startswith('CS')]
                
                if mg_courses and cs_courses:
                    mg_times = []
                    cs_times = []
                    
                    for exam in solution:
                        if exam['course'] in mg_courses:
                            mg_times.append((self.days.index(exam['day']), exam['hour']))
                        elif exam['course'] in cs_courses:
                            cs_times.append((self.days.index(exam['day']), exam['hour']))
                    
                    if mg_times and cs_times:
                        # Check if any MG exam is after any CS exam
                        mg_after_cs_count = 0
                        for mg_time in mg_times:
                            for cs_time in cs_times:
                                if mg_time > cs_time:  # MG exam after CS exam
                                    mg_after_cs_count += 1
                        
                        results['violations']['mg_after_cs_count'] += mg_after_cs_count
        
        # Check Friday 1 PM common break
        friday_break_violations = []
        for exam in solution:
            if exam['day'] == 'Friday' and exam['hour'] == 13:
                friday_break_violations.append(exam['course'])
        
        if friday_break_violations:
            results['soft_constraints']['friday_break'] = False
            results['violations']['friday_break_violations'] = friday_break_violations
        
        # Check soft constraints fulfillment
        total_students = len(student_exams)
        if results['violations']['back_to_back_count'] <= total_students * 0.1:  # Less than 10% of students have back-to-back
            results['soft_constraints']['no_back_to_back'] = True
        
        if results['violations']['mg_after_cs_count'] == 0:
            results['soft_constraints']['mg_before_cs'] = True
        
        # Check for two-hour faculty meeting slots
        faculty_free_count = defaultdict(int)
        total_faculty = len(self.data['teachers'])
        half_faculty = total_faculty // 2
        
        for time_key in self.time_slots:
            day, hour = time_key
            busy_faculty = set()
            for exam in solution:
                if (exam['day'], exam['hour']) == time_key:
                    busy_faculty.add(exam['teacher'])
            
            free_count = total_faculty - len(busy_faculty)
            faculty_free_count[time_key] = free_count
        
        # Find top slots with most free faculty
        sorted_slots = sorted(faculty_free_count.items(), key=lambda x: x[1], reverse=True)
        
        if len(sorted_slots) >= 2:
            slot1, free1 = sorted_slots[0]
            slot2, free2 = sorted_slots[1]
            
            if free1 >= half_faculty and free2 >= half_faculty:
                results['soft_constraints']['faculty_meeting'] = True
                self.faculty_meeting_slots = [slot1, slot2]
        
        # Count fulfilled constraints
        hard_constraints_fulfilled = sum(results['hard_constraints'].values())
        soft_constraints_fulfilled = sum(results['soft_constraints'].values())
        
        results['summary'] = {
            'hard_constraints_fulfilled': hard_constraints_fulfilled,
            'hard_constraints_total': len(results['hard_constraints']),
            'soft_constraints_fulfilled': soft_constraints_fulfilled,
            'soft_constraints_total': len(results['soft_constraints']),
            'faculty_meeting_slots': self.faculty_meeting_slots
        }
        
        return results
    
    def display_schedule(self, solution):
        """Display the exam schedule in a readable format"""
        if not solution:
            print("No solution to display.")
            return
        
        # Create a list for the schedule
        schedule_data = []
        
        for exam in solution:
            schedule_data.append({
                'Course Code': exam['course'],
                'Day': exam['day'],
                'Hour': f"{exam['hour']}:00",
                'Classroom': exam['classroom'],
                'Teacher': exam['teacher']
            })
        
        # Sort by day and hour
        day_order = {day: i for i, day in enumerate(self.days)}
        schedule_data.sort(key=lambda x: (day_order[x['Day']], x['Hour']))
        
        # Print schedule using tabulate for a nice table format
        print("\n=== EXAM SCHEDULE ===")
        print(tabulate(schedule_data, headers="keys", tablefmt="grid"))
        
        # Generate a schedule matrix view
        print("\n=== SCHEDULE MATRIX ===")
        # Create a matrix for the schedule (days x hours)
        schedule_matrix = {}
        for day in self.days:
            schedule_matrix[day] = {}
            for hour in self.hours:
                schedule_matrix[day][hour] = []
        
        # Fill the matrix with exams
        for exam in solution:
            day = exam['day']
            hour = exam['hour']
            course = exam['course']
            classroom = exam['classroom']
            
            schedule_matrix[day][hour].append(f"{course} ({classroom})")
        
        # Print the matrix
        header = ["Day"] + [f"{hour}:00" for hour in self.hours]
        matrix_rows = []
        
        for day in self.days:
            row = [day]
            for hour in self.hours:
                exams = schedule_matrix[day][hour]
                cell = ", ".join(exams) if exams else "-"
                row.append(cell)
            matrix_rows.append(row)
        
        print(tabulate(matrix_rows, headers=header, tablefmt="grid"))
        
        return schedule_data
    
    def display_statistics(self, solution):
        """Display statistics about the generated schedule"""
        # Count exams per day
        exams_by_day = {}
        for day in self.days:
            exams_by_day[day] = 0
        
        # Count exams per hour
        exams_by_hour = {}
        for hour in self.hours:
            exams_by_hour[hour] = 0
        
        # Count teacher assignments
        teacher_assignments = {}
        for teacher in self.data['teachers']:
            teacher_assignments[teacher] = 0
        
        # Count classroom usage
        classroom_usage = {}
        for classroom in self.data['classrooms']:
            classroom_usage[classroom] = 0
        
        # Process each exam
        for exam in solution:
            exams_by_day[exam['day']] += 1
            exams_by_hour[exam['hour']] += 1
            teacher_assignments[exam['teacher']] += 1
            classroom_usage[exam['classroom']] += 1
        
        # Display stats
        print("\n=== SCHEDULE STATISTICS ===")
        
        print("\nExams by Day:")
        day_stats = [{"Day": day, "Count": count} for day, count in exams_by_day.items()]
        print(tabulate(day_stats, headers="keys", tablefmt="simple"))
        
        print("\nExams by Hour:")
        hour_stats = [{"Hour": f"{hour}:00", "Count": count} for hour, count in exams_by_hour.items()]
        hour_stats.sort(key=lambda x: int(x["Hour"].split(":")[0]))
        print(tabulate(hour_stats, headers="keys", tablefmt="simple"))
        
        print("\nTeacher Workload (Top 5):")
        teacher_stats = [{"Teacher": teacher, "Exams": count} for teacher, count in teacher_assignments.items()]
        teacher_stats.sort(key=lambda x: x["Exams"], reverse=True)
        print(tabulate(teacher_stats[:5], headers="keys", tablefmt="simple"))
        print("\nClassroom Usage (Top 5):")
        classroom_stats = [{"Classroom": room, "Exams": count} for room, count in classroom_usage.items()]
        classroom_stats.sort(key=lambda x: x["Exams"], reverse=True)
        print(tabulate(classroom_stats[:5], headers="keys", tablefmt="simple"))
        
        # Faculty meeting slots
        if self.faculty_meeting_slots:
            print("\nFaculty Meeting Slots:")
            meeting_data = []
            for day, hour in self.faculty_meeting_slots:
                # Count free faculty members at this time
                busy_faculty = set()
                for exam in solution:
                    if exam['day'] == day and exam['hour'] == hour:
                        busy_faculty.add(exam['teacher'])
                
                free_count = len(self.data['teachers']) - len(busy_faculty)
                free_percentage = (free_count / len(self.data['teachers'])) * 100
                
                meeting_data.append({
                    "Day": day,
                    "Time": f"{hour}:00",
                    "Free Faculty": f"{free_count}/{len(self.data['teachers'])} ({free_percentage:.1f}%)"
                })
            
            print(tabulate(meeting_data, headers="keys", tablefmt="simple"))
        
        # Add student-centric statistics
        print("\n=== STUDENT IMPACT STATISTICS ===")
        
        # Track conflicts and back-to-back exams
        student_exams = defaultdict(list)
        for exam in solution:
            course = exam['course']
            day = exam['day']
            hour = exam['hour']
            
            # Track student exam times
            if course in self.data['course_enrollments']:
                for student in self.data['course_enrollments'][course]:
                    student_exams[student].append((day, hour, course))
        
        # Count conflicts
        conflict_count = 0
        conflict_students = set()
        
        # Count back-to-back exams
        back_to_back_count = 0
        back_to_back_students = set()
        
        # Count exams per student
        exams_per_student = {}
        
        for student, exams in student_exams.items():
            exams_per_student[student] = len(exams)
            
            # Group exams by time slot
            exam_times = defaultdict(list)
            for day, hour, course in exams:
                exam_times[(day, hour)].append(course)
            
            # Check for conflicts (multiple exams at same time)
            has_conflict = False
            for time_slot, courses in exam_times.items():
                if len(courses) > 1:
                    conflict_count += 1
                    has_conflict = True
            
            if has_conflict:
                conflict_students.add(student)
            
            # Check for back-to-back exams
            sorted_exams = sorted(exams, key=lambda x: (self.days.index(x[0]), x[1]))
            has_back_to_back = False
            
            for i in range(len(sorted_exams) - 1):
                curr_day, curr_hour, curr_course = sorted_exams[i]
                next_day, next_hour, next_course = sorted_exams[i + 1]
                
                if curr_day == next_day and next_hour - curr_hour == 1:
                    back_to_back_count += 1
                    has_back_to_back = True
            
            if has_back_to_back:
                back_to_back_students.add(student)
        
        # Print student impact statistics
        total_students = len(student_exams)
        conflict_percentage = (len(conflict_students) / total_students) * 100 if total_students > 0 else 0
        back_to_back_percentage = (len(back_to_back_students) / total_students) * 100 if total_students > 0 else 0
        
        print(f"\nTotal Students: {total_students}")
        print(f"Students with conflicts: {len(conflict_students)} ({conflict_percentage:.1f}%)")
        print(f"Students with back-to-back exams: {len(back_to_back_students)} ({back_to_back_percentage:.1f}%)")
        
        # Exams per student distribution
        if exams_per_student:
            exam_counts = Counter(exams_per_student.values())
            print("\nExams per Student Distribution:")
            dist_data = [{"Exams": count, "Students": freq, "Percentage": (freq/total_students)*100} 
                         for count, freq in sorted(exam_counts.items())]
            print(tabulate(dist_data, headers="keys", tablefmt="simple"))
        
        return {
            'exams_by_day': exams_by_day,
            'exams_by_hour': exams_by_hour,
            'teacher_assignments': teacher_assignments,
            'classroom_usage': classroom_usage,
            'student_conflicts': len(conflict_students),
            'back_to_back_exams': len(back_to_back_students)
        }
    
    def export_schedule_to_csv(self, solution, filename="exam_schedule.csv"):
        """Export the exam schedule to a CSV file"""
        if not solution:
            print("No solution to export.")
            return
        
        # Create a list for the schedule
        schedule_data = []
        
        for exam in solution:
            schedule_data.append([
                exam['course'],
                exam['day'],
                f"{exam['hour']}:00",
                exam['classroom'],
                exam['teacher']
            ])
        
        # Sort by day and hour
        day_order = {day: i for i, day in enumerate(self.days)}
        schedule_data.sort(key=lambda x: (day_order[x[1]], x[2]))
        
        # Add header
        header = ['Course Code', 'Day', 'Time', 'Classroom', 'Teacher']
        schedule_data.insert(0, header)
        
        # Write to CSV
        with open(filename, 'w', newline='') as f:
            writer = pd.DataFrame(schedule_data[1:], columns=schedule_data[0])
            writer.to_csv(f, index=False)
            
        print(f"\nSchedule exported to {filename}")

# 3. Main Function to Run the Scheduler
def main():
    """Main function to run the exam scheduler"""
    # Load data
    data = load_data()
    
    # Create scheduler
    scheduler = ExamScheduler(
        data,
        population_size=50,
        generations=75,
        crossover_rate=0.8,
        mutation_rate=0.2
    )
    
    # Run the genetic algorithm
    best_solution, best_fitness, avg_fitness_history, best_fitness_history = scheduler.run()
    
    # Evaluate the best solution
    evaluation = scheduler.evaluate_solution(best_solution)
    
    # Print evaluation results
    print("\n=== SOLUTION EVALUATION ===")
    print("\nHard Constraints:")
    for constraint, fulfilled in evaluation['hard_constraints'].items():
        status = "✓" if fulfilled else "✗"
        print(f"  {status} {constraint.replace('_', ' ').title()}")
    
    print("\nSoft Constraints:")
    for constraint, fulfilled in evaluation['soft_constraints'].items():
        status = "✓" if fulfilled else "✗"
        print(f"  {status} {constraint.replace('_', ' ').title()}")
    
    print(f"\nFulfilled {evaluation['summary']['hard_constraints_fulfilled']}/{evaluation['summary']['hard_constraints_total']} hard constraints")
    print(f"Fulfilled {evaluation['summary']['soft_constraints_fulfilled']}/{evaluation['summary']['soft_constraints_total']} soft constraints")
    
    # Display the schedule
    schedule_data = scheduler.display_schedule(best_solution)
    
    # Display statistics
    stats = scheduler.display_statistics(best_solution)
    
    # Export schedule to CSV
    scheduler.export_schedule_to_csv(best_solution, "exam_schedule.csv")
    
    # Plot fitness history
    try:
        import matplotlib.pyplot as plt
        
        plt.figure(figsize=(10, 6))
        plt.plot(avg_fitness_history, label='Average Fitness')
        plt.plot(best_fitness_history, label='Best Fitness')
        plt.xlabel('Generation')
        plt.ylabel('Fitness')
        plt.title('Fitness Evolution')
        plt.legend()
        plt.grid(True)
        plt.savefig('fitness_evolution.png')
        plt.close()
        
        print("\nFitness evolution plot saved as 'fitness_evolution.png'")
    except ImportError:
        print("Matplotlib not available. Skipping fitness plot generation.")
    
    return best_solution, stats

# 4. Additional Utility Functions
def analyze_student_impact(data, solution):
    """Analyze the impact of the schedule on students"""
    # Initialize scheduler to access its methods
    scheduler = ExamScheduler(data)
    
    # Track student exams
    student_exams = defaultdict(list)
    for exam in solution:
        course = exam['course']
        day = exam['day']
        hour = exam['hour']
        
        # Track student exam times
        if course in data['course_enrollments']:
            for student in data['course_enrollments'][course]:
                student_exams[student].append((day, hour, course))
    
    # Analyze exam spacing
    print("\n=== STUDENT EXAM SPACING ANALYSIS ===")
    
    day_indices = {day: i for i, day in enumerate(scheduler.days)}
    
    # Count exams per day for each student
    exams_per_day = defaultdict(lambda: defaultdict(int))
    for student, exams in student_exams.items():
        for day, hour, course in exams:
            exams_per_day[student][day] += 1
    
    # Find students with many exams in one day
    students_with_heavy_days = []
    for student, days in exams_per_day.items():
        max_exams_in_a_day = max(days.values()) if days else 0
        if max_exams_in_a_day >= 3:
            heavy_days = [day for day, count in days.items() if count >= 3]
            students_with_heavy_days.append((student, heavy_days, max_exams_in_a_day))
    
    # Print results
    if students_with_heavy_days:
        print(f"\nStudents with 3+ exams in a single day: {len(students_with_heavy_days)}")
        heavy_data = []
        for student, days, max_exams in sorted(students_with_heavy_days, key=lambda x: x[2], reverse=True)[:10]:
            heavy_data.append({
                "Student": student,
                "Days": ', '.join(days),
                "Max Exams in a Day": max_exams
            })
        print(tabulate(heavy_data, headers="keys", tablefmt="simple"))
    else:
        print("No students have 3+ exams in a single day.")
    
    # Analyze exam spread
    total_days = len(scheduler.days)
    student_spreads = []
    
    for student, exams in student_exams.items():
        if len(exams) <= 1:
            continue
            
        days = [day for day, _, _ in exams]
        unique_days = set(days)
        
        # Calculate first and last exam day
        day_numbers = [day_indices[day] for day in days]
        first_day = min(day_numbers)
        last_day = max(day_numbers)
        spread = last_day - first_day + 1  # +1 because inclusive
        
        # Calculate concentration (exams per day of spread)
        concentration = len(exams) / len(unique_days) if unique_days else 0
        
        student_spreads.append({
            "student": student,
            "exams": len(exams),
            "days_used": len(unique_days),
            "spread": spread,
            "concentration": concentration
        })
    
    # Print spread statistics
    if student_spreads:
        avg_spread = sum(s["spread"] for s in student_spreads) / len(student_spreads)
        avg_concentration = sum(s["concentration"] for s in student_spreads) / len(student_spreads)
        
        print(f"\nAverage exam spread: {avg_spread:.2f} days")
        print(f"Average exam concentration: {avg_concentration:.2f} exams per day")
        
        # Students with most compact schedules
        compact_schedules = sorted(student_spreads, key=lambda x: (x["spread"], -x["exams"]))[:10]
        print("\nStudents with most compact schedules:")
        compact_data = []
        for s in compact_schedules:
            compact_data.append({
                "Student": s["student"],
                "Exams": s["exams"],
                "Days Used": s["days_used"],
                "Day Spread": s["spread"],
                "Concentration": f"{s['concentration']:.2f}"
            })
        print(tabulate(compact_data, headers="keys", tablefmt="simple"))
        
        # Students with most spread out schedules
        spread_schedules = sorted(student_spreads, key=lambda x: (-x["spread"], -x["exams"]))[:10]
        print("\nStudents with most spread out schedules:")
        spread_data = []
        for s in spread_schedules:
            spread_data.append({
                "Student": s["student"],
                "Exams": s["exams"],
                "Days Used": s["days_used"],
                "Day Spread": s["spread"],
                "Concentration": f"{s['concentration']:.2f}"
            })
        print(tabulate(spread_data, headers="keys", tablefmt="simple"))
    
    return {
        "heavy_days": len(students_with_heavy_days),
        "student_spreads": student_spreads
    }

def optimize_teacher_workload(solution, teachers):
    """Attempt to optimize teacher workload distribution"""
    # Count assignments per teacher
    teacher_count = defaultdict(int)
    for exam in solution:
        teacher_count[exam['teacher']] += 1
    
    # Calculate statistics
    min_load = min(teacher_count.values())
    max_load = max(teacher_count.values())
    avg_load = sum(teacher_count.values()) / len(teacher_count)
    
    print("\n=== TEACHER WORKLOAD OPTIMIZATION ===")
    print(f"Initial workload - Min: {min_load}, Max: {max_load}, Avg: {avg_load:.2f}")
    
    # Find overloaded and underloaded teachers
    overloaded = [t for t, count in teacher_count.items() if count > avg_load + 1]
    underloaded = [t for t, count in teacher_count.items() if count < avg_load - 1]
    
    # Try to balance by reassigning exams
    reassignments = 0
    for exam in solution:
        if exam['teacher'] in overloaded and underloaded:
            # Reassign to an underloaded teacher
            old_teacher = exam['teacher']
            new_teacher = random.choice(underloaded)
            
            exam['teacher'] = new_teacher
            
            # Update counts
            teacher_count[old_teacher] -= 1
            teacher_count[new_teacher] += 1
            
            # Check if teachers are still over/underloaded
            if teacher_count[old_teacher] <= avg_load + 1:
                overloaded.remove(old_teacher)
            
            if teacher_count[new_teacher] >= avg_load - 1:
                underloaded.remove(new_teacher)
            
            reassignments += 1
            
            # Stop if we've balanced the workload
            if not overloaded or not underloaded:
                break
    
    # Recalculate statistics
    min_load = min(teacher_count.values())
    max_load = max(teacher_count.values())
    avg_load = sum(teacher_count.values()) / len(teacher_count)
    
    print(f"Made {reassignments} reassignments")
    print(f"Final workload - Min: {min_load}, Max: {max_load}, Avg: {avg_load:.2f}")
    
    # Display workload distribution
    workload_data = [{"Teacher": t, "Exams": c} for t, c in sorted(teacher_count.items(), key=lambda x: x[1], reverse=True)]
    print("\nFinal Teacher Workload:")
    print(tabulate(workload_data[:10], headers="keys", tablefmt="simple"))
    
    return solution

# Run the program if executed directly
if __name__ == "__main__":
    # Set random seed for reproducibility
    random.seed(42)
    np.random.seed(42)
    
    print("Starting Exam Scheduler...")
    best_solution, stats = main()
    
    # Run additional analyses
    print("\n=== ADDITIONAL ANALYSES ===")
    
    # Analyze student impact
    data = load_data()
    student_impact = analyze_student_impact(data, best_solution)
    
    # Optimize teacher workload
    optimized_solution = optimize_teacher_workload(best_solution, data['teachers'])
    
    print("\nExam scheduling complete!")
    print("The final schedule has been saved to 'exam_schedule.csv'")

Starting Exam Scheduler...
Loaded 26 courses
Loaded 299 student-course enrollments
Loaded 200 students
Loaded 63 teachers
After removing duplicates: 23 unique courses
Initializing population...
Starting evolution...
Generation 0: Avg Fitness = 544.46, Best Fitness = 990.00
  Individual 1: Fitness = 990.00, Exams = 23
  Individual 2: Fitness = 883.00, Exams = 23
  Individual 3: Fitness = 878.00, Exams = 23
Generation 10: Avg Fitness = 568.24, Best Fitness = 959.00
  Individual 1: Fitness = 959.00, Exams = 23
  Individual 2: Fitness = 925.00, Exams = 23
  Individual 3: Fitness = 870.00, Exams = 23
Generation 20: Avg Fitness = 600.00, Best Fitness = 888.00
  Individual 1: Fitness = 888.00, Exams = 23
  Individual 2: Fitness = 873.00, Exams = 23
  Individual 3: Fitness = 871.00, Exams = 23
Generation 30: Avg Fitness = 418.08, Best Fitness = 769.00
  Individual 1: Fitness = 769.00, Exams = 23
  Individual 2: Fitness = 759.00, Exams = 23
  Individual 3: Fitness = 749.00, Exams = 23
Generatio