In [1]:
import pandas as pd
from clingo import Control
import ast
from matching_utils import (
    get_invalid_matches, 
    check_mentor_capacity,
    check_day_conflicts,
    check_multiple_days
)

students_df = pd.read_csv('../DATASETS/studenten.csv')
mentors_df = pd.read_csv('../DATASETS/mentoren.csv')
mentors2_df = pd.read_csv('../DATASETS/mentorenB.csv')  # Second mentor list

education_mapping = {
    'Associate': 1,
    'Bachelor': 2, 
    'Master': 3,
    'PhD': 4
}

n_mentors = 3  # Number of mentors from first list
m_mentors = 2  # Number of mentors from second list

def parse_literal_list(raw):
    if isinstance(raw, str):
        try:
            return ast.literal_eval(raw)
        except (ValueError, SyntaxError):
            return []
    return raw if isinstance(raw, list) else []

def normalize_token(text):
    if not isinstance(text, str):
        return ""
    return text.lower().replace(' ', '_')

students_cache = []
student_lookup = {}
for idx, row in students_df.iterrows():
    student_id = f"s{idx}"
    subject_atom = normalize_token(row['Onderwerp'])
    availability = [day.lower() for day in parse_literal_list(row['Beschikbaarheid'])]
    entry = {
        'id': student_id,
        'education_level': education_mapping[row['Opleidingsniveau']],
        'subject_atom': subject_atom,
        'availability': availability,
        'data': {
            'voornaam': row['Voornaam'],
            'achternaam': row['Achternaam'],
            'opleidingsniveau': row['Opleidingsniveau'],
            'onderwerp': row['Onderwerp'],
        },
    }
    students_cache.append(entry)
    student_lookup[student_id] = entry

mentors_cache = []
mentor_lookup = {}
# Process first mentor list
for idx, row in mentors_df.iterrows():
    mentor_id = f"m{idx}"
    subjects = [normalize_token(subject) for subject in parse_literal_list(row['Onderwerpen'])]
    availability = [day.lower() for day in parse_literal_list(row['Beschikbaarheid'])]
    entry = {
        'id': mentor_id,
        'mentor_type': 'type1',
        'education_level': education_mapping[row['Opleidingsniveau']],
        'subjects': subjects,
        'availability': availability,
        'max_students': row['Max_Studenten'],
        'data': {
            'voornaam': row['Voornaam'],
            'achternaam': row['Achternaam'],
            'opleidingsniveau': row['Opleidingsniveau'],
            'type': 'Type 1',
        },
    }
    mentors_cache.append(entry)
    mentor_lookup[mentor_id] = entry

# Process second mentor list
for idx, row in mentors2_df.iterrows():
    mentor_id = f"m2_{idx}"
    subjects = [normalize_token(subject) for subject in parse_literal_list(row['Onderwerpen'])]
    availability = [day.lower() for day in parse_literal_list(row['Beschikbaarheid'])]
    entry = {
        'id': mentor_id,
        'mentor_type': 'type2',
        'education_level': education_mapping[row['Opleidingsniveau']],
        'subjects': subjects,
        'availability': availability,
        'max_students': row['Max_Studenten'],
        'data': {
            'voornaam': row['Voornaam'],
            'achternaam': row['Achternaam'],
            'opleidingsniveau': row['Opleidingsniveau'],
            'type': 'Type 2',
        },
    }
    mentors_cache.append(entry)
    mentor_lookup[mentor_id] = entry

def generate_asp_facts():
    facts = []
    days = set()
    
    for student in students_cache:
        facts.append(f"student({student['id']}).")
        facts.append(f"education({student['id']}, {student['education_level']}).")
        facts.append(f"expertise({student['id']}, {student['subject_atom']}).")
        
        for day in student['availability']:
            facts.append(f"availability({student['id']}, {day}).")
            days.add(day)
    
    for mentor in mentors_cache:
        facts.append(f"mentor({mentor['id']}).")
        facts.append(f"mentor_type({mentor['id']}, {mentor['mentor_type']}).")
        facts.append(f"education({mentor['id']}, {mentor['education_level']}).")
        
        for subject in mentor['subjects']:
            facts.append(f"expertise({mentor['id']}, {subject}).")
        
        facts.append(f"max_students({mentor['id']}, {mentor['max_students']}).")
        
        for day in mentor['availability']:
            facts.append(f"availability({mentor['id']}, {day}).")
            days.add(day)
    
    for day in sorted(days):
        facts.append(f"day({day}).")
    
    return "\n".join(facts)

asp_facts = generate_asp_facts()

# Configure solver WITHOUT optimization first
ctl = Control()

ctl.add("base", [], f"""
% Facts from Python
{asp_facts}

% Candidate mentor-student-day triples
candidate(S, M, Day) :-
    student(S),
    mentor(M),
    expertise(S, Subj),
    expertise(M, Subj),
    education(S, ES),
    education(M, EM),
    EM > ES,
    availability(S, Day),
    availability(M, Day).

% Choose which students to match (0 or 1 day per student)
{{ selected(S) }} :- student(S).

% If selected, choose exactly one day
1 {{ match_day(S, Day) : day(Day), availability(S, Day) }} 1 :- selected(S).

% Only allow days where student has enough candidates
:- match_day(S, Day), #count {{ M : candidate(S,M,Day), mentor_type(M,type1) }} < {n_mentors}.
:- match_day(S, Day), #count {{ M : candidate(S,M,Day), mentor_type(M,type2) }} < {m_mentors}.

% Choose exactly 3 type1 and 2 type2 mentors for each matched day
{n_mentors} {{ match(S, M, Day) : candidate(S,M,Day), mentor_type(M,type1) }} {n_mentors} :- match_day(S, Day).
{m_mentors} {{ match(S, M, Day) : candidate(S,M,Day), mentor_type(M,type2) }} {m_mentors} :- match_day(S, Day).

% Respect mentor capacities
:- mentor(M), max_students(M, Max), #count {{ S, Day : match(S, M, Day) }} > Max.

% Maximize selected students
#maximize {{ 1,S : selected(S) }}.

#show match/3.

""")

ctl.ground([("base", [])])

def make_matches(timeout_seconds=120):
    matches = []
    best_cost = None
    
    def collect_matches(model):
        nonlocal matches, best_cost
        
        grouped = {}
        for symbol in model.symbols(shown=True):
            if symbol.name == "match":
                student_id = symbol.arguments[0].name
                mentor_id = symbol.arguments[1].name
                day = symbol.arguments[2].name

                key = (student_id, day)
                grouped.setdefault(key, {'type1': [], 'type2': []})
                
                mentor_entry = mentor_lookup[mentor_id]
                mentor_type = mentor_entry['mentor_type']
                grouped[key][mentor_type].append(mentor_id)

        temp_matches = []
        for (student_id, day), mentor_groups in grouped.items():
            student_entry = student_lookup[student_id]
            student_data = student_entry['data'].copy()

            mentors_type1 = [mentor_lookup[m]['data'].copy() for m in sorted(mentor_groups['type1'])]
            mentors_type2 = [mentor_lookup[m]['data'].copy() for m in sorted(mentor_groups['type2'])]

            temp_matches.append((student_data, mentors_type1, mentors_type2, day))
        
        # Update best solution if this is better
        current_cost = model.cost
        if best_cost is None or current_cost < best_cost:
            best_cost = current_cost
            matches.clear()
            matches.extend(temp_matches)
            print(f"Found solution with {len(matches)} matched students (cost: {current_cost})")

    print(f"Solving with {timeout_seconds}s timeout (will return best solution found)...")

    # Async solve with timeout
    with ctl.solve(on_model=collect_matches, async_=True) as handle:
        handle.wait(timeout_seconds)
        handle.cancel()
        result = handle.get()

    # DEBUG: Check solve result
    print(f"\nSolve result: {result}")
    print(f"Satisfiable: {result.satisfiable}")
    print(f"Interrupted: {result.interrupted}")
    print(f"Exhausted: {result.exhausted}")
    
    if result.interrupted:
        print(f"⏳ Timeout - returning best solution found: {len(matches)} students")
    elif result.unsatisfiable:
        print("❌ UNSATISFIABLE - No valid solution exists!")
    elif result.exhausted:
        print(f"✅ Search complete - optimal solution: {len(matches)} students")
    else:
        print(f"✅ Found solution: {len(matches)} students")

    return matches

# Usage
matches = make_matches(timeout_seconds=120)
print(f"\nTotal matched students: {len(matches)} (each with {n_mentors} type1 mentors and {m_mentors} type2 mentors)")

def export_matches_to_csv(matches, filename='../DATASETS/matches.csv'):
    if not matches:
        print("No matches to export")
        return

    rows = []
    for student, mentors_type1, mentors_type2, day in matches:
        row = {
            'Student': f"{student['voornaam']} {student['achternaam']}",
            'Day': day.capitalize(),
            'Mentors_Type1': "; ".join(f"{m['voornaam']} {m['achternaam']}" for m in mentors_type1),
            'Mentors_Type2': "; ".join(f"{m['voornaam']} {m['achternaam']}" for m in mentors_type2),
        }

        rows.append(row)

    matches_df = pd.DataFrame(rows)
    matches_df.to_csv(filename, index=False)
    print(f"Exported {len(matches)} matches to {filename}")
    return matches_df

# Export the matches
matches_df = export_matches_to_csv(matches)

if matches_df is not None:
    print("\nPreview of exported matches:")
    print(matches_df.head())


#models not 0: optimality of last model not guaranteed.


Solving with 120s timeout (will return best solution found)...
Found solution with 0 matched students (cost: [0])
Found solution with 1 matched students (cost: [-1])
Found solution with 2 matched students (cost: [-2])
Found solution with 3 matched students (cost: [-3])
Found solution with 4 matched students (cost: [-4])
Found solution with 5 matched students (cost: [-5])
Found solution with 6 matched students (cost: [-6])
Found solution with 7 matched students (cost: [-7])
Found solution with 8 matched students (cost: [-8])
Found solution with 9 matched students (cost: [-9])
Found solution with 10 matched students (cost: [-10])
Found solution with 11 matched students (cost: [-11])
Found solution with 12 matched students (cost: [-12])
Found solution with 13 matched students (cost: [-13])
Found solution with 14 matched students (cost: [-14])
Found solution with 15 matched students (cost: [-15])
Found solution with 16 matched students (cost: [-16])
Found solution with 17 matched students 

In [2]:
# Add this new cell right after the diagnose_matching_problem() cell

def debug_asp_candidates():
    """Debug what the ASP solver sees"""
    print("=== ASP Candidate Debug ===\n")
    
    # Create a test solver to check what candidates exist
    test_ctl = Control()
    test_ctl.add("base", [], f"""
    {asp_facts}
    
    candidate(S, M, Day) :-
        student(S),
        mentor(M),
        expertise(S, Subj),
        expertise(M, Subj),
        education(S, ES),
        education(M, EM),
        EM > ES,
        availability(S, Day),
        availability(M, Day).
    
    eligible_day(S, Day) :-
        student(S),
        day(Day),
        #count {{ M : candidate(S,M,Day), mentor_type(M,type1) }} >= 3,
        #count {{ M : candidate(S,M,Day), mentor_type(M,type2) }} >= 2.
    
    #show candidate/3.
    #show eligible_day/2.
    """)
    
    test_ctl.ground([("base", [])])
    
    candidates = []
    eligible_days = []
    
    with test_ctl.solve(yield_=True) as handle:
        for model in handle:
            for symbol in model.symbols(shown=True):
                if symbol.name == "candidate":
                    s = symbol.arguments[0].name
                    m = symbol.arguments[1].name
                    d = symbol.arguments[2].name
                    candidates.append((s, m, d))
                elif symbol.name == "eligible_day":
                    s = symbol.arguments[0].name
                    d = symbol.arguments[1].name
                    eligible_days.append((s, d))
            break  # Only need one model
    
    print(f"Total candidates found: {len(candidates)}")
    print(f"Total eligible_day tuples: {len(eligible_days)}")
    
    if candidates:
        print("\nFirst 10 candidates:")
        for s, m, d in candidates[:10]:
            student = student_lookup[s]
            mentor = mentor_lookup[m]
            print(f"  {student['data']['voornaam']} -> {mentor['data']['voornaam']} ({mentor['mentor_type']}) on {d}")
    
    if eligible_days:
        print(f"\n✅ Found {len(eligible_days)} eligible student-day pairs!")
        for s, d in eligible_days[:5]:
            student = student_lookup[s]
            print(f"  {student['data']['voornaam']} can be matched on {d}")
    else:
        print("\n❌ NO ELIGIBLE DAYS FOUND!")
        print("This means no student has 3+ type1 AND 2+ type2 candidates on the same day.")
        
        # Check per-student breakdown
        print("\nPer-student breakdown:")
        for student in students_cache[:5]:
            for day in student['availability']:
                type1_count = sum(1 for s, m, d in candidates 
                                 if s == student['id'] and d == day and mentor_lookup[m]['mentor_type'] == 'type1')
                type2_count = sum(1 for s, m, d in candidates 
                                 if s == student['id'] and d == day and mentor_lookup[m]['mentor_type'] == 'type2')
                
                if type1_count > 0 or type2_count > 0:
                    print(f"  {student['data']['voornaam']} on {day}: {type1_count} type1, {type2_count} type2")

debug_asp_candidates()

=== ASP Candidate Debug ===

Total candidates found: 12279
Total eligible_day tuples: 396

First 10 candidates:
  Micha -> Nout (type1) on donderdag
  Micha -> Ryan (type1) on maandag
  Micha -> Ryan (type1) on donderdag
  Micha -> Helena (type1) on maandag
  Micha -> Helena (type1) on donderdag
  Micha -> Helena (type1) on zaterdag
  Micha -> Jasmijn (type1) on maandag
  Micha -> Gijs (type1) on woensdag
  Micha -> Gijs (type1) on donderdag
  Micha -> Gijs (type1) on zaterdag

✅ Found 396 eligible student-day pairs!
  Tijs can be matched on dinsdag
  Niek can be matched on dinsdag
  Tim can be matched on dinsdag
  Kyara can be matched on dinsdag
  Noortje can be matched on dinsdag
Total candidates found: 12279
Total eligible_day tuples: 396

First 10 candidates:
  Micha -> Nout (type1) on donderdag
  Micha -> Ryan (type1) on maandag
  Micha -> Ryan (type1) on donderdag
  Micha -> Helena (type1) on maandag
  Micha -> Helena (type1) on donderdag
  Micha -> Helena (type1) on zaterdag
  M

In [3]:
def diagnose_matching_problem():
    """Diagnose why no matches are being found"""
    print("=== Diagnostic Information ===\n")
    
    # Check students
    print(f"Total students: {len(students_cache)}")
    if students_cache:
        sample = students_cache[0]
        print(f"Sample student: {sample['data']['voornaam']} {sample['data']['achternaam']}")
        print(f"  - Subject: {sample['subject_atom']}")
        print(f"  - Level: {sample['education_level']}")
        print(f"  - Availability: {sample['availability']}")
    
    # Check mentors by type
    type1_mentors = [m for m in mentors_cache if m['mentor_type'] == 'type1']
    type2_mentors = [m for m in mentors_cache if m['mentor_type'] == 'type2']
    
    print(f"\nTotal type1 mentors: {len(type1_mentors)}")
    print(f"Total type2 mentors: {len(type2_mentors)}")
    
    if type1_mentors:
        sample = type1_mentors[0]
        print(f"\nSample type1 mentor: {sample['data']['voornaam']} {sample['data']['achternaam']}")
        print(f"  - Subjects: {sample['subjects']}")
        print(f"  - Level: {sample['education_level']}")
        print(f"  - Availability: {sample['availability']}")
        print(f"  - Max students: {sample['max_students']}")
    
    if type2_mentors:
        sample = type2_mentors[0]
        print(f"\nSample type2 mentor: {sample['data']['voornaam']} {sample['data']['achternaam']}")
        print(f"  - Subjects: {sample['subjects']}")
        print(f"  - Level: {sample['education_level']}")
        print(f"  - Availability: {sample['availability']}")
        print(f"  - Max students: {sample['max_students']}")
    
    # Check for potential candidates
    print("\n=== Checking Candidate Availability ===")
    
    candidates_type1 = 0
    candidates_type2 = 0
    
    for student in students_cache:
        student_candidates_type1 = []
        student_candidates_type2 = []
        
        for mentor in mentors_cache:
            # Check basic matching conditions
            if mentor['education_level'] > student['education_level']:
                if student['subject_atom'] in mentor['subjects']:
                    common_days = set(student['availability']) & set(mentor['availability'])
                    if common_days:
                        if mentor['mentor_type'] == 'type1':
                            student_candidates_type1.append((mentor, common_days))
                        else:
                            student_candidates_type2.append((mentor, common_days))
        
        candidates_type1 += len(student_candidates_type1)
        candidates_type2 += len(student_candidates_type2)
        
        if len(student_candidates_type1) < n_mentors or len(student_candidates_type2) < m_mentors:
            print(f"\n[ISSUE] Student {student['data']['voornaam']} {student['data']['achternaam']}:")
            print(f"  - Needs {n_mentors} type1 mentors, found {len(student_candidates_type1)} candidates")
            print(f"  - Needs {m_mentors} type2 mentors, found {len(student_candidates_type2)} candidates")
            if len(student_candidates_type1) + len(student_candidates_type2) > 0:
                print(f"  - Subject: {student['subject_atom']}, Level: {student['education_level']}")
    
    print(f"\nTotal type1 candidate matches possible: {candidates_type1}")
    print(f"Total type2 candidate matches possible: {candidates_type2}")
    print(f"\nRequired per student: {n_mentors} type1 + {m_mentors} type2 = {n_mentors + m_mentors} total")

# Run diagnostics
diagnose_matching_problem()

=== Diagnostic Information ===

Total students: 100
Sample student: Micha Claesdr
  - Subject: creative_digital_innovation
  - Level: 2
  - Availability: ['maandag', 'woensdag', 'donderdag', 'zaterdag']

Total type1 mentors: 100
Total type2 mentors: 100

Sample type1 mentor: Xavi van Wickerode
  - Subjects: ['ethical_hacking', 'data_visualisation', 'software_architecture', 'business_process_analytics']
  - Level: 3
  - Availability: ['maandag', 'woensdag', 'donderdag', 'zondag']
  - Max students: 5

Sample type2 mentor: Noah WolfswinkelB
  - Subjects: ['creative_digital_innovation', 'ethical_hacking', 'data_visualisation', 'data_science']
  - Level: 2
  - Availability: ['maandag', 'dinsdag', 'zondag']
  - Max students: 4

=== Checking Candidate Availability ===

Total type1 candidate matches possible: 2263
Total type2 candidate matches possible: 2374

Required per student: 3 type1 + 2 type2 = 5 total


In [4]:
def debug_matching():
    matches = make_matches()
    print(f"Total matches found: {len(matches)}")
    
    if matches:
        print("\nSample matches:")
        for student, mentors_type1, mentors_type2, day in matches[:5]:
            print(f"  {student['voornaam']} {student['achternaam']} ({student['opleidingsniveau']}, {student['onderwerp']})")
            print(f"  Day: {day.capitalize()}")
            print(f"  Type 1 Mentors:")
            for mentor in mentors_type1:
                print(f"    -> {mentor['voornaam']} {mentor['achternaam']} ({mentor['opleidingsniveau']})")
            print(f"  Type 2 Mentors:")
            for mentor in mentors_type2:
                print(f"    -> {mentor['voornaam']} {mentor['achternaam']} ({mentor['opleidingsniveau']})")
            print()
    
    # Flatten matches for validation - need to get full student and mentor data
    flat_matches = []
    for student, mentors_type1, mentors_type2, day in matches:
        # Find the original student entry to get complete data
        student_entry = next(
            (s for s in students_cache if 
             s['data']['voornaam'] == student['voornaam'] and 
             s['data']['achternaam'] == student['achternaam']),
            None
        )
        
        if student_entry:
            # Reconstruct full student data for validation
            full_student = student.copy()
            full_student['beschikbaarheid'] = student_entry['availability']
            
            for mentor in (mentors_type1 + mentors_type2):
                # Find the original mentor ID to get complete data
                mentor_entry = next(
                    (m for m in mentors_cache if 
                     m['data']['voornaam'] == mentor['voornaam'] and 
                     m['data']['achternaam'] == mentor['achternaam']),
                    None
                )
                if mentor_entry:
                    # Reconstruct full mentor data for validation
                    full_mentor = mentor.copy()
                    full_mentor['onderwerpen'] = mentor_entry['subjects']  # Add subjects list
                    full_mentor['beschikbaarheid'] = mentor_entry['availability']  # Add availability
                    full_mentor['max_studenten'] = mentor_entry['max_students']  # Add max students
                    flat_matches.append((full_student, full_mentor, day))
    
    print("\n=== Validation Results ===")
    
    invalid_matches = get_invalid_matches(flat_matches)
    if invalid_matches:
        print(f"[FAIL] Invalid matches: {len(invalid_matches)}")
        for student, mentor, day in invalid_matches[:3]:
            print(f"   - {student['voornaam']} {student['achternaam']} -> {mentor['voornaam']} {mentor['achternaam']} on {day}")
    else:
        print("[PASS] All matches are valid")
    
    over_capacity = check_mentor_capacity(flat_matches)
    if over_capacity:
        print(f"[FAIL] Mentors over capacity:")
        for item in over_capacity:
            print(f"   - {item['mentor']}: {item['matched']} matched (max: {item['max']})")
    else:
        print("[PASS] All mentors within capacity")
    
    day_conflicts = check_day_conflicts(flat_matches)
    if day_conflicts:
        print(f"[FAIL] Day conflicts found:")
        for conflict in day_conflicts:
            print(f"   - {conflict['type'].capitalize()} {conflict['name']} has {conflict['count']} matches on {conflict['day']}")
    else:
        print("[PASS] No day conflicts")
    
    multiple_days = check_multiple_days(flat_matches)
    if multiple_days:
        print(f"[FAIL] Students matched on multiple days:")
        for item in multiple_days:
            print(f"   - {item['student']}: {', '.join(item['days'])}")
    else:
        print("[PASS] No students matched on multiple days")

debug_matching()


Solving with 120s timeout (will return best solution found)...
Found solution with 100 matched students (cost: [-100])

Solve result: SAT
Satisfiable: True
Interrupted: False
Exhausted: True
✅ Search complete - optimal solution: 100 students
Total matches found: 100

Sample matches:
  Bram Marceron (Associate, Data Visualisation)
  Day: Donderdag
  Type 1 Mentors:
    -> Xavi van Wickerode (Master)
    -> Lucy Hulskes (Bachelor)
    -> Chris van Landen (PhD)
  Type 2 Mentors:
    -> Maryam KortB (PhD)
    -> Lois MaasB (Bachelor)

  Lisa de Swart (Bachelor, Software Architecture)
  Day: Donderdag
  Type 1 Mentors:
    -> Xavi van Wickerode (Master)
    -> Jill van de Velden (PhD)
    -> Jill Jans (Master)
  Type 2 Mentors:
    -> Gijs RijksB (PhD)
    -> Jasmijn van BeaumontB (Master)

  Eva Simonis (Bachelor, Software Architecture)
  Day: Donderdag
  Type 1 Mentors:
    -> Xavi van Wickerode (Master)
    -> Nora Köster (Master)
    -> Roos van Ginkel (Master)
  Type 2 Mentors:
    -> 