In [1]:
import pandas as pd
import model

In [2]:
# Read data 

data_mentor = pd.read_csv('data-mentors.csv', encoding='utf-8')
data_student = pd.read_csv('data-students.csv', encoding='utf-8')
data_pairing = pd.read_csv('data-pairings.csv', encoding='utf-8')
data_student.head()

Unnamed: 0,Name,Gender,University,Department,Class,Which of the following topic(s) would you be most interested in reading about? [Set Theory/Model Theory],Which of the following topic(s) would you be most interested in reading about? [Category Theory],Which of the following topic(s) would you be most interested in reading about? [Real Analysis/Functional Analysis],Which of the following topic(s) would you be most interested in reading about? [Complex Analysis],Which of the following topic(s) would you be most interested in reading about? [Harmonic Analysis],...,Which of the following topic(s) would you be most interested in reading about? [Statistics],Which of the following topic(s) would you be most interested in reading about? [Computer Algebra],Which of the following topic(s) would you be most interested in reading about? [Data Science],Which of the following topic(s) would you be most interested in reading about? [Cryptography/Coding theory],Which of the following topic(s) would you be most interested in reading about? [Mathematical Modeling],Which of the following topic(s) would you be most interested in reading about? [Classical Mechanics],Which of the following topic(s) would you be most interested in reading about? [Complex Systems/Dynamical Systems],Which of the following topic(s) would you be most interested in reading about? [Quantum Theory],Which of the following topic(s) would you be most interested in reading about? [Relativity and Quantum Relativistic Theories],Which of the following topic(s) would you be most interested in reading about? [Statistical Mechanics]
0,Student1,Male,Bilkent,Mathematics,1,Second Choice,,,,,...,,,Third Choice,First Choice,,,,,,
1,Student2,Male,ODTU,Mathematics,3,First Choice,,Second Choice,,,...,,,,,,,,,,
2,Student3,Male,ODTU,Mathematics,4,Second Choice,,,First Choice,,...,,,,,,,,,,
3,Student4,Male,Bogazici,Mathematics,1,,,,,,...,,,,,,,,,,
4,Student5,Male,ODTU,Mathematics,4,,,,,,...,,,,,,,,,,


In [3]:
# Remove the questions in the column names
for i in range(5,35):
    data_student.rename(columns = {data_student.columns[i]: data_student.columns[i][80:-1]},inplace=True)
    data_mentor.rename(columns = {data_mentor.columns[i]: data_mentor.columns[i][66:-1]},inplace=True)
data_student.head()


Unnamed: 0,Name,Gender,University,Department,Class,Set Theory/Model Theory,Category Theory,Real Analysis/Functional Analysis,Complex Analysis,Harmonic Analysis,...,Statistics,Computer Algebra,Data Science,Cryptography/Coding theory,Mathematical Modeling,Classical Mechanics,Complex Systems/Dynamical Systems,Quantum Theory,Relativity and Quantum Relativistic Theories,Statistical Mechanics
0,Student1,Male,Bilkent,Mathematics,1,Second Choice,,,,,...,,,Third Choice,First Choice,,,,,,
1,Student2,Male,ODTU,Mathematics,3,First Choice,,Second Choice,,,...,,,,,,,,,,
2,Student3,Male,ODTU,Mathematics,4,Second Choice,,,First Choice,,...,,,,,,,,,,
3,Student4,Male,Bogazici,Mathematics,1,,,,,,...,,,,,,,,,,
4,Student5,Male,ODTU,Mathematics,4,,,,,,...,,,,,,,,,,


Required sets for the model

1) List of all universities
2) List of subjects
3) A dictionary for universites - key: university name, value: list of students from that university 
4) Two dictionaries for interest areas (one for students, one for mentors) - key: student name, value: a list of his/her ordered choices
5) A dictionary for the score of assigning mentee i to mentor j - key: pairs (student, mentee), value: an integer score of matching a mentor with a mentee 

Note: If a mentee (or mentor) has only one interest, say only choice 1, then leave his/her choices 2 and 3 empty, and assign a score to him/her by using only choice 1. Since some mentors have a fourth choice, all mentees will get zero for their fourth choices.

In [4]:
data_mentor.head()

Unnamed: 0,Name,Gender,Affiliation,Position,Areas of interest:,Set Theory/Model Theory,Category Theory,Real Analysis/Functional Analysis,Complex Analysis,Harmonic Analysis,...,Statistics,Computer Algebra,Data Science,Cryptography/Coding theory,Mathematical Modeling,Classical Mechanics,Complex Systems/Dynamical Systems,Quantum Theory,Relativity and Quantum Relativistic Theories,Statistical Mechanics
0,Mentor1,Male,The University of Alabama,Postdoctoral researcher,PDEs,,,Second Choice,,,...,,,Fourth Choice,,,,,,,
1,Mentor2,Male,Middle East Technical University METU,Master,Algebraic and Differential Geometry,,,,,,...,,,,,,,,,,
2,Mentor3,Male,Illinois Institute of Technology,PhD student,"Extremal Graph Theory, Optimization",,,,,,...,,,,,First Choice,,,,,
3,Mentor4,Male,Higher School of Economics National Research U...,Master,"Plane Arrangements, Clifford Algebras, Algebra...",,,,Third Choice,,...,,,,,,,,,,
4,Mentor5,Male,İstanbul teknik üniversitesi,PhD student,Data science/coding theory,,,,,,...,,Second Choice,First Choice,,,,,,,


In [5]:
# The required set (1)
student_uni = []  
for uni in data_student['University']:
    if uni not in student_uni:
        student_uni.append(uni)


# The required set (2)
subjects = [col for col in data_student.columns[5:35]]  


# The required set (3)
uni_students = {}
for uni in student_uni:
    uni_students[uni] = [] # Initialize a list of students for each uni

for index,row in data_student.iterrows():
    uni_students[row['University']].append(row['Name'])


# The required set (4)
choice_mentee = {}  
choice_mentor = {}
wrong_mentee=[]
wrong_mentor = []

for index,row in data_student.iterrows():
    choice_mentee[row['Name']] = {'First Choice':0, 'Second Choice':0, 'Third Choice':0}
    for subject in subjects:
        if pd.isnull(row[subject]) == False:
            interest_list = str(row[subject]).split(',')
          #  print(interest_list)

            for word in interest_list:
                if word in ['First Choice', 'Second Choice', 'Third Choice']:
                    choice_mentee[row['Name']][word] = subject
                else:   
                    wrong_mentee.append(word)
                    print(row)


for index,row in data_mentor.iterrows():
    choice_mentor[row['Name']] = {'First Choice':0, 'Second Choice':0, 'Third Choice':0, 'Fourth Choice':0}
    for subject in subjects:
        if pd.isnull(row[subject]) == False:
            interest_list = str(row[subject]).split(',')

            for word in interest_list:
                if word in ['First Choice', 'Second Choice', 'Third Choice', 'Fourth Choice']:
                    choice_mentor[row['Name']][word] = subject
                else:   
                    wrong_mentor.append(word)
                    print(row)


In [None]:
# The required set (5)

weights = {'First Choice':8, 'Second Choice':6, 'Third Choice':4, 'Fourth Choice':2}

scores = {(mentee, mentor):0 for mentor in choice_mentor.keys() for mentee in choice_mentee.keys()}

for student in choice_mentee.keys():
    for mentor in choice_mentor.keys():
        for choice1 in choice_mentee[student].keys():
            for choice2 in choice_mentor[mentor].keys():
                if choice_mentee[student][choice1] == choice_mentor[mentor][choice2]:
                    scores[(student, mentor)] = scores[(student, mentor)] + weights[choice1]*weights[choice2]

print(scores)


In [None]:
max_student = 2
uni_capacity = 20
time = 300

obj_value, solution = model.matching(choice_mentee, choice_mentor, uni_students, scores, max_student, uni_capacity, time)