In [1]:
import pandas as pd
import model
# import openpyxl

In [2]:
ls

Integer_Programming.pdf  [34m__pycache__[m[m/             main.ipynb
README.md                [34mdata[m[m/                    model.py


In [3]:
# Read data

data_mentor = pd.read_csv('data/data-mentors.csv', encoding='utf-8')
data_student = pd.read_csv('data/data-students.csv', encoding='utf-8')
data_pairing = pd.read_csv('data/data-pairings.csv', encoding = 'utf-8')

In [4]:
# See the first few rows of the data

data_student.tail(20)
# data_mentor.head()
# data_pairing.head()

Unnamed: 0,Name,Gender,University,Department,Class,Reference,Set Theory/Model Theory,Category Theory,Real Analysis/Functional Analysis,Complex Analysis,...,Statistics,Computer Algebra,Data Science,Cryptography/Coding theory,Mathematical Modeling,Classical Mechanics,Complex Systems/Dynamical Systems,Quantum Theory,Relativity and Quantum Relativistic Theories,Statistical Mechanics
65,Student66,Male,ITU,Mathematics,2,0,,,,,...,Second Choice,,First Choice,,,,,,,
66,Student67,Female,ODTU,Physics,5,12,,,,,...,Third Choice,,First Choice,,,Second Choice,,,,
67,Student68,Female,ODTU,Mathematics,2,11,,,Third Choice,,...,Second Choice,,First Choice,,,,,,,
68,Student69,Male,GTU,Mathematics,4,11,,,,,...,,,,,,,,,,
69,Student70,Female,IYTE,Mathematics,5,3,,,,Second Choice,...,,,First Choice,,,,,,,
70,Student71,Male,ODTU,Physics,2,12,,,,,...,,,,,,,,First Choice,Second Choice,Third Choice
71,Student72,Male,Bogazici,Mathematics,1,12,Third Choice,,,,...,,,,,,,,,,
72,Student73,Female,Atilim,Mathematics,4,11,,,First Choice,Second Choice,...,,,,,,,,,Third Choice,
73,Student74,Male,ODTU,Mathematics,5,12,,Second Choice,,,...,,,Third Choice,,,,,,,
74,Student75,Male,ODTU,Physics,3,9,,,,,...,,,,,,,,,,


# Model 1: Define Required Sets

    1. List of all universities
    2. List of subjects
    3. A dictionary for universites - key: university name, value: list of students from that university
    4. Two dictionaries for interest areas (one for students, one for mentors) - key: student name, value: a list of his/her ordered choices
    5. A dictionary for the score of assigning mentee i to mentor j - key: pairs (student, mentee), value: an integer score of matching a mentor with a mentee


Note: If a mentee (or mentor) has only one interest, say only choice 1, then leave his/her choices 2 and 3 empty, and assign a score to him/her by using only choice 1. Since some mentors have a fourth choice, all mentees will get zero for their fourth choices.

In [5]:
# The required set (1)

student_uni = data_student['University'].unique()    

# The required set (2)

subjects = [col for col in data_student.columns[6:36]]  

# The required set (3)

# data_student['University'].value_counts()

uni_students = data_student.groupby('University')['Name'].apply(list).to_dict()

In [6]:
# The required set (4)

choice_mentee = {}  
choice_mentor = {}
wrong_mentee=[]
wrong_mentor = []

for index,row in data_student.iterrows():
    choice_mentee[row['Name']] = {'First Choice':0, 'Second Choice':0, 'Third Choice':0}
    for subject in subjects:
        if pd.isnull(row[subject]) == False:
            interest_list = str(row[subject]).split(',')
          #  print(interest_list)

            for word in interest_list:
                if word in ['First Choice', 'Second Choice', 'Third Choice']:
                    choice_mentee[row['Name']][word] = subject
                else:   
                    wrong_mentee.append(word)
                   # print(row)


for index,row in data_mentor.iterrows():
    choice_mentor[row['Name']] = {'First Choice':0, 'Second Choice':0, 'Third Choice':0, 'Fourth Choice':0}
    for subject in subjects:
        if pd.isnull(row[subject]) == False:
            interest_list = str(row[subject]).split(',')

            for word in interest_list:
                if word in ['First Choice', 'Second Choice', 'Third Choice', 'Fourth Choice']:
                    choice_mentor[row['Name']][word] = subject
                else:   
                    wrong_mentor.append(word)
                   # print(row)
                 

In [7]:
# The required set (5)

weights = {'First Choice':8, 'Second Choice':6, 'Third Choice':4, 'Fourth Choice':2}

# Define score function

def score(student, mentor):
    score = 0
    for choice1 in choice_mentee[student].keys():
            for choice2 in choice_mentor[mentor].keys():
                if choice_mentee[student][choice1] == choice_mentor[mentor][choice2]:
                    score += weights[choice1]*weights[choice2]
    return score

# Define the scores dictionary

scores = {(mentee, mentor):0 for mentor in choice_mentor.keys() for mentee in choice_mentee.keys()}

for student in choice_mentee.keys():
    for mentor in choice_mentor.keys():
        scores[(student,mentor)] = score(student,mentor)

# Model 1: Trials

In [8]:
max_student = 1
uni_capacity = 10
time = 300

obj_value, solution, df = model.matching_one(choice_mentee, choice_mentor, uni_students, scores, max_student, uni_capacity, time)
print(obj_value)
df.head()

3900.0


Unnamed: 0,Mentor,Student,Score
0,Mentor1,Student18,88
1,Mentor2,Student69,48
2,Mentor3,Student42,48
3,Mentor4,Student26,64
4,Mentor5,Student38,112


In [9]:
# Score of the 2022 pairing

score_2022 = 0

for index, row in data_pairing.iterrows():
    score_2022 += score(row['Student'],row['Mentor'])
    
score_2022

2564

# Model 2: Define Additional Sets 

    1. mentee_high: list of mentee names at high levels (example: junior and senior applicants)

    2. mentor_high: list of mentor names (example: PhD candidates)

    3. gender: dictionary. key: mentee name, value: 1 for girls, 0 for boys

    4. ref_score: dictionary. key: mentee name, value: reference score (allowed to be zero)

In [10]:
#  The required additional set (1)

mentee_high = list(data_student[data_student['Class'] >= 3])


# The required additional set (2)

mentor_high = list(data_mentor[data_mentor['Position_numeric'] >= 2])

# The required additional set (3)

gender = {}

for index,row in data_student.iterrows():
    if row['Gender'] == 'Male':
        gender[row['Name']] = 0
    else: gender[row['Name']] = 1
        

# The required additional set (4)


ref_score = data_student.set_index('Name')['Reference'].to_dict()



In [11]:
type(mentee_high)

list

# Model 2: Trials

In [None]:
max_student = 1
uni_capacity = 10
time = 300

"Set the following parameters"
weight_subject = 1  # positive integer
weight_class = 1  # positive integer
weight_ref = 10  # positive integer
epsilon = 0.4   # number from the interval (0, 1/2). Example: assign 40/100 for having at least 40% women 
min_ref = 8   # define a minimum reference score from a predefined range of ref scores. Any mentee with a ref score below that number will not be assigned.

obj_value_2, solution_2, df_2  = model.matching_two(choice_mentee, choice_mentor, uni_students, mentee_high, mentor_high, scores, 
                                                gender, ref_score, max_student, uni_capacity, time, epsilon, min_ref, weight_subject, weight_class, weight_ref)

# mentor_high ve mentee_high hatali. Asagidaki pairleri iceriyor, icermemeli.

In [19]:
for i in mentee_high:
    for j in mentor_high:
        if (i, j) not in scores:
            print(f"Missing score for pair: ({i}, {j})")
        if i not in ref_score:
            print(f"Missing ref_score for mentee: {i}")


Missing score for pair: (Name, Name)
Missing ref_score for mentee: Name
Missing score for pair: (Name, Gender)
Missing ref_score for mentee: Name
Missing score for pair: (Name, Affiliation)
Missing ref_score for mentee: Name
Missing score for pair: (Name, Position)
Missing ref_score for mentee: Name
Missing score for pair: (Name, Position_numeric)
Missing ref_score for mentee: Name
Missing score for pair: (Name, Areas of interest:)
Missing ref_score for mentee: Name
Missing score for pair: (Name, Set Theory/Model Theory)
Missing ref_score for mentee: Name
Missing score for pair: (Name, Category Theory)
Missing ref_score for mentee: Name
Missing score for pair: (Name, Real Analysis/Functional Analysis)
Missing ref_score for mentee: Name
Missing score for pair: (Name, Complex Analysis)
Missing ref_score for mentee: Name
Missing score for pair: (Name, Harmonic Analysis)
Missing ref_score for mentee: Name
Missing score for pair: (Name, Partial Differential Equations)
Missing ref_score for 

# Mentee ve mentor listesi degil bunlar. Yazdirip gorelim:

In [20]:
mentor_high

['Name',
 'Gender',
 'Affiliation',
 'Position',
 'Position_numeric',
 'Areas of interest:',
 'Set Theory/Model Theory',
 'Category Theory',
 'Real Analysis/Functional Analysis',
 'Complex Analysis',
 'Harmonic Analysis',
 'Partial Differential Equations',
 'Numerical Analysis',
 'Group Theory',
 'Representation Theory',
 'Rings and Algebra',
 'Algebraic Number Theory',
 'Analytic Number Theory',
 'Combinatorics',
 'Graph Theory',
 'Algebraic Geometry',
 'Differential Geometry',
 'General Topology',
 'Algebraic Topology',
 'Geometric Topology',
 'Probability Theory',
 'Statistics',
 'Computer Algebra',
 'Data Science',
 'Cryptography/Coding theory',
 'Mathematical Modeling',
 'Classical Mechanics',
 'Complex Systems/Dynamical Systems',
 'Quantum Theory',
 'Relativity and Quantum Relativistic Theories',
 'Statistical Mechanics']

In [21]:
mentee_high

['Name',
 'Gender',
 'University',
 'Department',
 'Class',
 'Reference',
 'Set Theory/Model Theory',
 'Category Theory',
 'Real Analysis/Functional Analysis',
 'Complex Analysis',
 'Harmonic Analysis',
 'Partial Differential Equations',
 'Numerical Analysis',
 'Group Theory',
 'Representation Theory',
 'Rings and Algebra',
 'Algebraic Number Theory',
 'Analytic Number Theory',
 'Combinatorics',
 'Graph Theory',
 'Algebraic Geometry',
 'Differential Geometry',
 'General Topology',
 'Algebraic Topology',
 'Geometric Topology',
 'Probability Theory',
 'Statistics',
 'Computer Algebra',
 'Data Science',
 'Cryptography/Coding theory',
 'Mathematical Modeling',
 'Classical Mechanics',
 'Complex Systems/Dynamical Systems',
 'Quantum Theory',
 'Relativity and Quantum Relativistic Theories',
 'Statistical Mechanics']