<a href="https://colab.research.google.com/github/LaunchPad-Purdue/Mentor-Mentee-Pairing/blob/main/Pairing_Algorithm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np

In [None]:
mentees = pd.read_csv("fakeMentees.csv")
mentors = pd.read_csv("fakeMentors.csv")

In [None]:
mentees['Name'] = mentees['Name'].str.split().str[0]
mentees.head()

Unnamed: 0,Name
0,Bradley
1,Trenton
2,Brianna
3,Cristobal
4,Billy


In [None]:
mentors['Name'] = mentors['Name'].str.split().str[0]
mentors.head()

Unnamed: 0,Name
0,Isaac
1,Aileen
2,Jan
3,Donnie
4,Sammy


## Random generation for testing 

In [None]:
import random

def generate_match(max=30):
  val = random.randint(0, max - 1)
  return val

def generate_10_students(max=30):
  arr = []
  while len(arr) < 10:
    num = generate_match(max)
    if num not in arr:
      arr.append(num)
  return arr

In [None]:
# Simulating distribution with 1, 2
test = {0: 0, 1: 0}
for i in range(1, 10000):
  test[generate_match(2)] += 1
print(test)
# Roughly simulating three batches of students

generate_10_students()

{0: 5059, 1: 4940}


[21, 8, 23, 24, 28, 25, 17, 22, 26, 18]

In [None]:
# Randomizer for simulating responses

mentee_ranks = pd.DataFrame() # What mentors ranked mentees
mentor_ranks = pd.DataFrame() # What mentees ranked mentors

for i in mentors['Name']: # Create a bunch of random randomized 1-10 rankings for mentors
  df = {"Timestamp": "10",
         "Email address": "%s@purdue.edu" % i.lower(),
         "What's your name?": i}
  arr = generate_10_students()
  df["Rank 1 (Top Choice)"] = mentees["Name"][arr[0]]
  for j in range(1, 10):
    df["Rank %d" % (j + 1)] = mentees["Name"][arr[j]]
  mentee_ranks = mentee_ranks.append(df, ignore_index=True)

for i in mentees['Name']:
  df = {"Timestamp": "10",
         "Email address": "%s@purdue.edu" % i.lower(),
         "What's your name?": i}
  arr = generate_10_students()
  df["Rank 1 (Top Choice)"] = mentors["Name"][arr[0]]
  for j in range(1, 10):
    df["Rank %d" % (j + 1)] = mentors["Name"][arr[j]]
  mentor_ranks = mentor_ranks.append(df, ignore_index=True)

# This is all basically to get a replica of the google form responses that'll be loaded in

In [None]:
mentee_ranks.head()

Unnamed: 0,Email address,Rank 1 (Top Choice),Rank 10,Rank 2,Rank 3,Rank 4,Rank 5,Rank 6,Rank 7,Rank 8,Rank 9,Timestamp,What's your name?
0,isaac@purdue.edu,Althea,Rowena,Becky,Kris,Paula,John,Lawanda,Sean,Rhoda,Catherine,10,Isaac
1,aileen@purdue.edu,Dean,Cristobal,John,Rowena,Billy,Teddy,Catherine,Dianna,Kris,Lawanda,10,Aileen
2,jan@purdue.edu,Brianna,Simone,Sean,Rosalind,Everett,Cristobal,Teddy,Catherine,Hobert,Walker,10,Jan
3,donnie@purdue.edu,Erica,John,Deangelo,Becky,Kasey,Cristobal,Paula,Everett,Hubert,Billy,10,Donnie
4,sammy@purdue.edu,Rosalind,Catherine,Cristobal,Becky,Simone,Erica,Irene,Brianna,Dianna,Paula,10,Sammy


In [None]:
mentor_ranks.head()

Unnamed: 0,Email address,Rank 1 (Top Choice),Rank 10,Rank 2,Rank 3,Rank 4,Rank 5,Rank 6,Rank 7,Rank 8,Rank 9,Timestamp,What's your name?
0,bradley@purdue.edu,Quinn,Sammy,Donte,Abdul,Arturo,Latonya,Genaro,Darcy,Louis,Homer,10,Bradley
1,trenton@purdue.edu,Eleanor,Yvette,Teddy,Kathleen,Damian,Aileen,Carmela,Darcy,Quinn,Gilbert,10,Trenton
2,brianna@purdue.edu,Homer,Arturo,Carmela,Louis,Kathleen,Aileen,Hung,Donte,James,Erma,10,Brianna
3,cristobal@purdue.edu,James,Genaro,Gilbert,Erma,Hung,Eleanor,Abdul,Travis,Isaac,Damian,10,Cristobal
4,billy@purdue.edu,Abdul,Jan,Louis,Donnie,Hung,Yvette,James,Kaye,Travis,Donte,10,Billy


# Actual Algorithm
Based on: https://rosettacode.org/wiki/Stable_marriage_problem#Python


In [None]:
# Now here's where the actual algorithm starts.
# Insert stuff to read csv files for mentor mentee here
## mentor_ranks = pd.read_csv("menteesresp.csv")
## mentee_ranks = pd.read_csv("mentorsresp.csv")

In [None]:
mentor_ranks.head()

Unnamed: 0,Email address,Rank 1 (Top Choice),Rank 10,Rank 2,Rank 3,Rank 4,Rank 5,Rank 6,Rank 7,Rank 8,Rank 9,Timestamp,What's your name?
0,bradley@purdue.edu,Quinn,Sammy,Donte,Abdul,Arturo,Latonya,Genaro,Darcy,Louis,Homer,10,Bradley
1,trenton@purdue.edu,Eleanor,Yvette,Teddy,Kathleen,Damian,Aileen,Carmela,Darcy,Quinn,Gilbert,10,Trenton
2,brianna@purdue.edu,Homer,Arturo,Carmela,Louis,Kathleen,Aileen,Hung,Donte,James,Erma,10,Brianna
3,cristobal@purdue.edu,James,Genaro,Gilbert,Erma,Hung,Eleanor,Abdul,Travis,Isaac,Damian,10,Cristobal
4,billy@purdue.edu,Abdul,Jan,Louis,Donnie,Hung,Yvette,James,Kaye,Travis,Donte,10,Billy


## Preprocessing

Since Stable Marriage requires an (2N)x(N) mapping, we're randomly ranking people between 11-30 to get to a 60 x 30 mapping.

Our assumption is that most matchings would happen between people who ranked each other in their top 10. We'll then figure out people who didn't get matches between 1-10 and manually review their profiles and match them.

In [None]:
tor_p = {} #mentor preferences
tee_p = {} #mentee preferences

tors = mentors.to_numpy().reshape(30)
tees = mentees.to_numpy().reshape(30)

# Setup dictionaries with names as keys

for i in tors:
  tor_p[i] = [] 

for i in tees:
  tee_p[i] = []

# Building mentor ranks
for i in mentee_ranks.iterrows():
  name = i[1]["What's your name?"]
  tor_p[name].append(i[1]["Rank 1 (Top Choice)"])
  for j in range(1, 10):
    tor_p[name].append(i[1]["Rank %d" % (j + 1)]) # Add 1-10 rankings to each person
  remaining = np.setdiff1d(tees, tor_p[name])
  np.random.shuffle(remaining) # Shuffle remaining 11-20 rankings (see explanation above)
  for j in remaining:
    tor_p[name].append(j)
print("Mentor Rankings: ", tor_p)

# Building mentee ranks
for i in mentor_ranks.iterrows():
  name = i[1]["What's your name?"]
  tee_p[name].append(i[1]["Rank 1 (Top Choice)"])
  for j in range(1, 10):
    tee_p[name].append(i[1]["Rank %d" % (j + 1)])
  remaining = np.setdiff1d(tors, tee_p[name])
  np.random.shuffle(remaining)
  for j in remaining:
    tee_p[name].append(j)
print("Mentee Rankings: ", tee_p)

Mentor Rankings:  {'Isaac': ['Althea', 'Becky', 'Kris', 'Paula', 'John', 'Lawanda', 'Sean', 'Rhoda', 'Catherine', 'Rowena', 'Stefan', 'Dianna', 'Bradley', 'Everett', 'Dean', 'Billy', 'Franklin', 'Hubert', 'Rosalind', 'Brianna', 'Hobert', 'Walker', 'Cristobal', 'Deangelo', 'Simone', 'Erica', 'Teddy', 'Trenton', 'Irene', 'Kasey'], 'Aileen': ['Dean', 'John', 'Rowena', 'Billy', 'Teddy', 'Catherine', 'Dianna', 'Kris', 'Lawanda', 'Cristobal', 'Brianna', 'Rosalind', 'Erica', 'Trenton', 'Stefan', 'Hubert', 'Sean', 'Becky', 'Paula', 'Irene', 'Deangelo', 'Walker', 'Franklin', 'Bradley', 'Hobert', 'Althea', 'Everett', 'Rhoda', 'Simone', 'Kasey'], 'Jan': ['Brianna', 'Sean', 'Rosalind', 'Everett', 'Cristobal', 'Teddy', 'Catherine', 'Hobert', 'Walker', 'Simone', 'Dean', 'Bradley', 'Billy', 'Dianna', 'John', 'Lawanda', 'Irene', 'Trenton', 'Hubert', 'Althea', 'Becky', 'Paula', 'Rowena', 'Deangelo', 'Erica', 'Rhoda', 'Kris', 'Franklin', 'Kasey', 'Stefan'], 'Donnie': ['Erica', 'Deangelo', 'Becky', 'Kase

## Stable Marriage

This does a better explanation than I could: https://en.wikipedia.org/wiki/Stable_marriage_problem

In [None]:
import copy

## Disclosure: This is copied from https://rosettacode.org/wiki/Stable_marriage_problem#Python

guyprefers = tor_p
galprefers = tee_p
guys = sorted(tor_p.keys())
gals = sorted(tee_p.keys()) 

def check(engaged):
    inverseengaged = dict((v,k) for k,v in engaged.items())
    for she, he in engaged.items():
        shelikes = galprefers[she]
        shelikesbetter = shelikes[:shelikes.index(he)]
        helikes = guyprefers[he]
        helikesbetter = helikes[:helikes.index(she)]
        for guy in shelikesbetter:
            guysgirl = inverseengaged[guy]
            guylikes = guyprefers[guy]
            if guylikes.index(guysgirl) > guylikes.index(she):
                print("%s and %s like each other better than "
                      "their present partners: %s and %s, respectively"
                      % (she, guy, he, guysgirl))
                return False
        for gal in helikesbetter:
            girlsguy = engaged[gal]
            gallikes = galprefers[gal]
            if gallikes.index(girlsguy) > gallikes.index(he):
                print("%s and %s like each other better than "
                      "their present partners: %s and %s, respectively"
                      % (he, gal, she, girlsguy))
                return False
    return True
 
def matchmaker():
    guysfree = guys[:]
    engaged  = {}
    guyprefers2 = copy.deepcopy(guyprefers)
    galprefers2 = copy.deepcopy(galprefers)
    while guysfree:
        guy = guysfree.pop(0)
        guyslist = guyprefers2[guy]
        gal = guyslist.pop(0)
        fiance = engaged.get(gal)
        if not fiance:
            # She's free
            engaged[gal] = guy
            print("  %s and %s" % (guy, gal))
        else:
            # The bounder proposes to an engaged lass!
            galslist = galprefers2[gal]
            if galslist.index(fiance) > galslist.index(guy):
                # She prefers new guy
                engaged[gal] = guy
                print("  %s dumped %s for %s" % (gal, fiance, guy))
                if guyprefers2[fiance]:
                    # Ex has more girls to try
                    guysfree.append(fiance)
            else:
                # She is faithful to old fiance
                if guyslist:
                    # Look again
                    guysfree.append(guy)
    return engaged
 
 
print('\nPairings:')
engaged = matchmaker()
 
print('\nMentor-Mentee Pairs:')
print('  ' + ',\n  '.join('%s is paired to %s' % couple
                          for couple in sorted(engaged.items())))
print()
print('Pairing stability check PASSED'
      if check(engaged) else 'Pairing stability check FAILED')


Pairings:
  Abdul and Hubert
  Aileen and Dean
  Arturo and Rosalind
  Berta and John
  Brandy and Hobert
  Darcy and Billy
  Donnie and Erica
  Donte and Rowena
  John dumped Berta for Eleanor
  Erma and Teddy
  Gilbert and Catherine
  Hiram and Becky
  Homer and Kasey
  Isaac and Althea
  Rowena dumped Donte for James
  Jan and Brianna
  Kathleen and Trenton
  Kaye and Kris
  Kent and Paula
  Paula dumped Kent for Latonya
  Louis and Franklin
  Quinn and Lawanda
  Rosalind dumped Arturo for Sammy
  Teddy and Everett
  Travis and Stefan
  Franklin dumped Louis for Yvette
  Becky dumped Hiram for Carmela
  Genaro and Deangelo
  Arturo and Cristobal
  Becky dumped Carmela for Louis
  Hung and Irene
  Donte and Bradley
  Rowena dumped James for Kent
  Carmela and Sean
  Teddy dumped Erma for Damian
  Stefan dumped Travis for Hiram
  Erma and Rhoda
  Hobert dumped Brandy for Berta
  Brianna dumped Jan for James
  Althea dumped Isaac for Brandy
  Sean dumped Carmela for Jan
  John dumped 

In [None]:
import json
print(json.dumps(engaged,sort_keys=True, indent=4))

{
    "Althea": "Brandy",
    "Becky": "Teddy",
    "Billy": "Abdul",
    "Bradley": "Donte",
    "Brianna": "James",
    "Catherine": "Louis",
    "Cristobal": "Damian",
    "Dean": "Aileen",
    "Deangelo": "Eleanor",
    "Dianna": "Genaro",
    "Erica": "Arturo",
    "Everett": "Donnie",
    "Franklin": "Yvette",
    "Hobert": "Berta",
    "Hubert": "Carmela",
    "Irene": "Erma",
    "John": "Travis",
    "Kasey": "Homer",
    "Kris": "Kaye",
    "Lawanda": "Quinn",
    "Paula": "Latonya",
    "Rhoda": "Isaac",
    "Rosalind": "Sammy",
    "Rowena": "Kent",
    "Sean": "Gilbert",
    "Simone": "Hung",
    "Stefan": "Hiram",
    "Teddy": "Jan",
    "Trenton": "Kathleen",
    "Walker": "Darcy"
}


In [None]:
# Figuring out true matches (Where both mentors and mentees actually ranked the people)
results = pd.DataFrame()
true_matches = {}
for k in engaged:
  v = engaged[k]
  if tor_p[v].index(k) < 10 and tee_p[k].index(v) < 10: # Either one has been selected using random picks
    true_matches[k + ", " + v] = str(tee_p[k].index(v)) + ", " + str(tor_p[v].index(k))
    df = {}
    df["Mentor Name"] = v
    df["Mentee Name"] = k
    df["Mentee's Rating"] = tee_p[k].index(v) + 1
    df["Mentor's Rating"] = tor_p[v].index(k) + 1
    results = results.append(df, ignore_index=True)
print(json.dumps(true_matches,sort_keys=True, indent=4))

{
    "Althea, Brandy": "0, 2",
    "Becky, Teddy": "5, 4",
    "Billy, Abdul": "0, 2",
    "Bradley, Donte": "1, 2",
    "Brianna, James": "7, 5",
    "Catherine, Louis": "1, 2",
    "Cristobal, Damian": "8, 5",
    "Dean, Aileen": "6, 0",
    "Erica, Arturo": "8, 6",
    "Everett, Donnie": "4, 6",
    "Franklin, Yvette": "5, 0",
    "Hobert, Berta": "0, 6",
    "Hubert, Carmela": "5, 9",
    "John, Travis": "0, 5",
    "Kasey, Homer": "0, 0",
    "Kris, Kaye": "0, 0",
    "Lawanda, Quinn": "0, 0",
    "Paula, Latonya": "1, 0",
    "Rosalind, Sammy": "2, 0",
    "Sean, Gilbert": "9, 1",
    "Simone, Hung": "7, 3",
    "Stefan, Hiram": "5, 3",
    "Teddy, Jan": "0, 5",
    "Trenton, Kathleen": "2, 0"
}


In [None]:
# Figuring out matches made by randomization:

random_found = {}
for k in engaged:
  v = engaged[k]
  if tor_p[v].index(k) >= 10 or tee_p[k].index(v) >= 10: # Either one has been selected using random picks
    random_found[k + ", " + v] = str(tee_p[k].index(v)) + ", " + str(tor_p[v].index(k))
    df = {}
    df["Mentor Name"] = v
    df["Mentee Name"] = k
    df["Mentee's Rating"] = tee_p[k].index(v) + 1
    df["Mentor's Rating"] = tor_p[v].index(k) + 1
    results = results.append(df, ignore_index=True)
print(json.dumps(random_found,sort_keys=True, indent=4))

{
    "Deangelo, Eleanor": "19, 2",
    "Dianna, Genaro": "13, 3",
    "Irene, Erma": "12, 10",
    "Rhoda, Isaac": "20, 7",
    "Rowena, Kent": "10, 2",
    "Walker, Darcy": "11, 11"
}


The randomized dataset had estimated around 10/30 pairs wouldn't get matches. But we didn't consider that if a mentee ranks a mentor high, the mentor is likely to rank the mentee high. And vice-versa. Both of these considerations would make it more likely that people rank each other in their top 10.

We didn't really get into conditional probabilities in this simulation, but if we did consider those we'd have even fewer pairs that needed to be matched manually.

In [None]:
results = results.reindex(columns=["Mentee Name", "Mentor Name", "Mentee's Rating", "Mentor's Rating"])
results["Mentee's Rating"] = pd.to_numeric(results["Mentee's Rating"], downcast="integer")
results["Mentor's Rating"] = pd.to_numeric(results["Mentor's Rating"], downcast="integer")
results.head()

Unnamed: 0,Mentee Name,Mentor Name,Mentee's Rating,Mentor's Rating
0,Hubert,Carmela,6,10
1,Dean,Aileen,7,1
2,Rosalind,Sammy,3,1
3,John,Travis,1,6
4,Hobert,Berta,1,7


In [None]:
results.to_csv("pairs.csv", index=False)