# Stable Matching Pair Research
Implementation of [Stable Roomates](http://www.dcs.gla.ac.uk/~pat/jchoco/roommates/papers/Comp_sdarticle.pdf) for [Pair Research](http://pairresearch.io/). 

## Implementation of Stable Roommates Matching 

In [1]:
"""
Stable Roommate Matching

Implementation of Robert Irving's Stable Roommates Algorithm.
http://www.dcs.gla.ac.uk/~pat/jchoco/roommates/papers/Comp_sdarticle.pdf
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import random
import unittest
from copy import deepcopy


def stable_roommates(preferences, debug=False):
    """
    Runs complete algorithm and returns a stable matching, if exists.

    Input:
        preferences (matrix, list of lists of numbers): n-by-m preference matrix containing preferences for each person.
            m = n - 1, so each person has rated all other people.
            Each row is a 1-indexed ordered ranking of others in the pool.
            Therefore max(preferences[person]) <= number people and min(preferences[person]) = 1.
        debug (boolean): including print statements

    Return:
        (list): stable matching, if exists. Otherwise, None.
            If a matching exists, -1 for a person indicates no partner.
            ex: [2, 1, -1] (Person 0 matched with 2, 1 matched with 0, 2 not matched)
    """
    # validate input
    is_valid, person_added, valid_preferences = validate_input(preferences, debug)

    if not is_valid:
        if debug:
            print('Invalid input. Must be n-by-m (where m = n - 1) list of lists of numbers.')
        return None

    # create a preference lookup table
    # person_number : [list of preferences]
    preferences_dict = {str(x + 1): [str(y) for y in valid_preferences[x]] for x in range(len(valid_preferences))}

    # create a dict of dicts holding index of each person ranked
    # person number : {person : rank_index }
    ranks = {index: dict(zip(value, range(len(value)))) for (index, value) in preferences_dict.items()}

    # phase 1: initial proposal
    p1_holds = phase_1(preferences_dict, ranks)

    # if anyone does not have a hold, stable matching is not possible
    for hold in p1_holds:
        if p1_holds[hold] is None:
            if debug:
                print('Stable matching is not possible. Failed at Phase 1: not everyone was proposed to.')
            return None

    # phase 1: reduction
    p1_reduced_preferences = phase_1_reduce(preferences_dict, ranks, p1_holds)

    # phase 1: stable match halting condition
    # if p1_reduced_preferences has only one preference per person, matching should be stable (lemma 2)
    p1_halt = True
    for person in p1_reduced_preferences:
        if len(p1_reduced_preferences[person]) > 1:
            p1_halt = False
            break

    if p1_halt:
        # verification before returning
        if verify_matching(p1_holds):
            if verify_stability(p1_holds, ranks):
                if debug:
                    print('Stable matching found. Returning person : partner dictionary.')

                # check if person was added. if so, delete added person (n + 1) and set their match to -1
                if person_added:
                    person_str = str(len(p1_holds))
                    person_added_match = p1_holds[person_str]

                    del p1_holds[person_str]
                    p1_holds[person_added_match] = '-1'

                if debug:
                    print(p1_holds)

                return format_output(p1_holds)
            else:
                if debug:
                    print('Stable matching is not possible. Failed at Verification: matching computed, but not stable.')
                return None
        else:
            if debug:
                print('Stable matching is not possible. Failed at Verification: matching computed, but not valid.')
            return None

    # phase 2: find an all-or-nothing cycle
    cycle = find_all_or_nothing_cycle(p1_reduced_preferences, ranks, p1_holds)

    # if cycle with more than size 3 does not exist, no stable matching exists
    if cycle is None:
        if debug:
            print('Stable matching is not possible. Failed at Phase 2: could not find an all-or-nothing cycle.')
        return None
    elif cycle is not None and len(cycle) == 3:
        if debug:
            print('Stable matching is not possible. Failed at Phase 2: could not find an all-or-nothing cycle len > 3.')
        return None

    # phase 2: reduction
    final_holds = phase_2_reduce(p1_reduced_preferences, ranks, cycle)

    # check if holds are not empty
    if final_holds is not None:
        # verification
        if verify_matching(final_holds):
            if verify_stability(final_holds, ranks):
                if debug:
                    print('Stable matching found. Returning person : partner dictionary.')

                # check if person was added. if so, delete added person (n + 1) and set their match to -1
                if person_added:
                    person_str = str(len(final_holds))
                    person_added_match = final_holds[person_str]

                    del final_holds[person_str]
                    final_holds[person_added_match] = '-1'

                if debug:
                    print(final_holds)

                return format_output(final_holds)
            else:
                if debug:
                    print('Stable matching is not possible. Failed at Verification: matching computed, but not stable.')
                return None
        else:
            if debug:
                print('Stable matching is not possible. Failed at Verification: matching computed, but not valid.')
            return None
    else:
        if debug:
            print('Stable matching is not possible. Failed at Phase 2 reduction.')
        return None


def phase_1(preferences, ranks, curr_holds=None):
    """
    Performs first phase of matching by doing round robin proposals until stopping condition (i) or (ii) is met:
         (i) each person is holding a proposal
        (ii) one person is rejected by everyone

    Input:
        preferences (dict of list of strings): dict of ordered preference lists {person : [ordered list]}
        ranks (dict of dict of ranking index): dict of persons with dicts indicating rank of each other person
        curr_holds (dict, optional): dict of persons with current holds

    Return:
        (dict): holds after condition (i) or (ii) is met.
    """
    people = list(preferences.keys())

    # placeholder for holds
    holds = {person: None for person in people}

    # during phase 1, no holds exist. initialize curr_holds to 0 (all people are > 0)
    if curr_holds is None:
        curr_holds = {person: 0 for person in people}

    # randomize ordering of proposal
    random.shuffle(people)

    # track people who are already proposed to
    proposed_set = set()

    # begin proposing
    for person in people:
        proposer = person

        # proposal step
        while True:
            # find proposer someone to propose to, in order of proposer's preference list
            while curr_holds[proposer] < len(preferences[proposer]):
                # find proposee given proposer's preferences
                proposee = preferences[proposer][curr_holds[proposer]]
                curr_holds[proposer] += 1

                # find who proposee is holding, if any
                proposee_hold = holds[proposee]

                # stop searching if proposee doesn't hold anyone or ranks proposer higher than curr hold
                if proposee_hold is None or ranks[proposee][proposer] < ranks[proposee][proposee_hold]:
                    # proposee holds proposer's choice
                    holds[proposee] = proposer
                    break

            # check if proposee has already been proposed to
            if proposee not in proposed_set:
                # successful proposal
                proposed_set.add(proposee)
                break

            # if all preferences are exhausted and proposee does not have anyone, stop
            if curr_holds[proposer] >= len(preferences[proposer]):
                break

            # if proposee is proposed to, reject proposee_hold and continue proposal with them
            proposer = proposee_hold

    # final holds from phase 1
    return holds


def phase_1_reduce(preferences, ranks, holds):
    """
    Performs a reduction on preferences based on phase 1 proposals, and the following:
        Preference list for y who holds proposal x can be reduced by deleting
             (i) all those to whom y prefers x
            (ii) all those who hold a proposal from a person they prefer to y

    Input:
        preferences (dict of list of strings): dict of ordered preference lists {person : [ordered list]}
        ranks (dict of dict of ranking index): dict of persons with dicts indicating rank of each other person
        holds (dict): dict of persons with current holds

    Return:
        (dict of list of strings): reduced preference list such that:
            (iii) y is the first on x's list and last on y's
             (iv) b appears on a's list iff a appears on b's
    """
    # create output preferences
    reduced_preferences = deepcopy(preferences)

    # loop though each hold
    for proposee in holds:
        proposer = holds[proposee]

        # loop though all of person's preferences
        i = 0
        while i < len(reduced_preferences[proposee]):
            # fetch proposee's preferences
            curr_proposee_preference = reduced_preferences[proposee][i]

            # proposee should only hold preferences equal and higher to proposer (i)
            if curr_proposee_preference == proposer:
                reduced_preferences[proposee] = reduced_preferences[proposee][:(i + 1)]
            # delete all people who hold a proposal from someone they prefer to the proposee (ii)
            elif ranks[curr_proposee_preference][holds[curr_proposee_preference]] < \
                    ranks[curr_proposee_preference][proposee]:
                reduced_preferences[proposee].pop(i)
                continue

            # continue to preference list
            i += 1

    return reduced_preferences


def find_all_or_nothing_cycle(preferences, ranks, holds):
    """
    Finds an all-or-nothing cycle in reduced preferences, if exists.

    Input:
        preferences (dict of list of strings): dict of ordered preference lists {person : [ordered list]}
        ranks (dict of dict of ranking index): dict of persons with dicts indicating rank of each other person
        holds (dict): dict of persons with current holds

    Return:
        (list): cycle of persons
    """
    # start with two individuals, p and q
    p = []
    q = []

    # find a person with > 1 preference left
    curr = None
    for person in preferences:
        if len(preferences[person]) > 1:
            curr = person
            break

    # if no person can be found, no cycle exists
    if curr is None:
        return None

    # create cycle
    while curr not in p:
        # q_i = second person in p_i's list
        q += [preferences[curr][1]]

        # p_{i + 1} = q_i's last person
        p += [curr]
        curr = preferences[q[-1]][-1]

    cycle = p[p.index(curr):]

    return cycle


def phase_2_reduce(preferences, ranks, cycle):
    """
    Performs a reduction on found all-or-nothing cycles.

    Input:
        preferences (dict of list of strings): dict of ordered preference lists {person : [ordered list]}
        ranks (dict of dict of ranking index): dict of persons with dicts indicating rank of each other person
        cycle (list): all-or-nothing cycle

    Return:
        (dict): holds after sequential reductions, or None if no matching can be found.
    """
    # continue while a cycle exists
    curr_cycle = deepcopy(cycle)
    curr_holds = None
    p2_preferences = deepcopy(preferences)

    while curr_cycle is not None:
        curr_preferences = {}

        for person in preferences:
            if person in curr_cycle:
                curr_preferences[person] = 1
            else:
                curr_preferences[person] = 0

        curr_holds = phase_1(p2_preferences, ranks, curr_preferences)
        p2_preferences = phase_1_reduce(p2_preferences, ranks, curr_holds)

        curr_cycle = find_all_or_nothing_cycle(p2_preferences, ranks, curr_holds)

    return curr_holds


def validate_input(preference_matrix, debug=False):
    """
    Makes sure a preference matrix is n-by-m and m = n - 1.
        If each isn't full, fill the list with the remaining people.
        If n is odd, add in a n + 1 person to allow matching to run.


    Input:
        preferences (matrix, list of lists of numbers): n-by-m preference matrix containing preferences for each person.
            m = n - 1, so each person has rated all other people.
            Each row is a 1-indexed ordered ranking of others in the pool.
            Therefore max(preferences[person]) <= number people and min(preferences[person]) = 1.

    Return:
        (boolean): if matrix is valid
        (boolean): if n + 1 person was added
        (matrix, list of lists numbers): filled and validated preference list
    """
    is_valid = False
    person_added = False
    output_matrix = deepcopy(preference_matrix)

    n = len(output_matrix)
    m = n - 1

    matrix_iterator = range(n)

    # validate list of lists of numbers
    if type(output_matrix) is not list:
        if debug:
            print('Input validation failed: preference_matrix is not a list.')
        return False, person_added, None

    # validate size
    if n <= 1:  # empty matrix or only 1 person (no point in matching)
        if debug:
            print('Input validation failed: preference_matrix must have size > 1')
        return False, person_added, None

    # validate content of matrix
    for i in matrix_iterator:
        sublist = output_matrix[i]

        #  each sublist is a list
        if type(sublist) is not list:
            if debug:
                print('Input validation failed: each list in preference_list should be a list.')
            return False, person_added, None

        # each preference list can only be of length m
        if len(sublist) > m:
            if debug:
                print('Input validation failed: each list in preference_list cannot have length greater than m.')
            return False, person_added, None

        # each value is an int
        for j in sublist:
            if type(j) is not int:
                if debug:
                    print('Input validation failed: all values should be integers')
                return False, person_added, None

            # number should be between 1 and n and should be the person index
            if j < 1 or j > n or j == (i + 1):
                if debug:
                    print('Input validation failed: each value in each row should be between \
                          1 and n (number of people) and cannot be the person themselves')
                return False, person_added, None

    if debug:
        print('Input validation passed.')
    is_valid = True

    # add n + 1 person if n is odd
    if n % 2 != 0:
        person_added = True
        output_matrix += [range(1, n + 1)]

    # fill any rows that are not of length m
    full_set = set(range(1, n + 1))
    for i in matrix_iterator:
        if len(output_matrix[i]) != m:
            to_add = full_set - set(output_matrix[i]) - {i + 1}
            output_matrix[i] += list(to_add)

        if person_added:
            output_matrix[i] += [n + 1]

    # returns is_valid (list of list of numbers), person_added (if n is odd), output_matrix (filled preference_matrix)
    return is_valid, person_added, output_matrix


def verify_matching(matching):
    """
    Checks if a matching is valid.
        Valid matchings have all people matched to one and only one person.

    Input:
        matching (dict): dict containing person:matching pairs

    Return:
        (boolean)): matching is valid
    """
    # validate matching
    person_set = {person for person in matching.keys()}
    matching_set = {match for match in matching.values()}

    # equal cardinality and content
    if person_set != matching_set:
        return False

    # check for a:b, then b:a
    for person in matching:
        if person != matching[matching[person]]:
            return False

    # matching is valid
    return True


def verify_stability(matching, ranks):
    """
    Checks if a valid matching (all people matched to one and only one person) is stable.
        Stable iff no two unmatched members both prefer each other to their current partners in the matching.

    Input:
        matching (dict): dict containing person:matching pairs
        ranks (dict of dict of ranking index): dict of persons with dicts indicating rank of each other person

    Output:
        (boolean): matching is stable
    """
    for x in matching:
        for y in matching:
            # ignore if x, y are the same or x, y are matched
            if x == y or y == matching[x]:
                continue

            # get partner under matching for x, y and corresponding ranks of matched partners
            x_partner = matching[x]
            y_partner = matching[y]

            x_partner_rank = ranks[x][x_partner]
            y_partner_rank = ranks[y][y_partner]

            # get ranking of x -> y, y -> x
            x_y_rank = ranks[x][y]
            y_x_rank = ranks[y][x]

            # if x prefers y to current partner AND y prefers x to current partner, unstable
            # prefer = lower ranking index since ranking is highest -> lowest preference
            if x_y_rank < x_partner_rank and y_x_rank < y_partner_rank:
                return False

    return True


def format_output(matching):
    """
    Formats holds into output that matches maximum weighted matching output.
        ex: [2, 1, -1] (Person 0 matched with 2, 1 matched with 0, 2 not matched)

    Input:
        matching (dict): dict of persons who they are matched to (-1 if unmatched)

    Return:
        (list): stable matching, if exists. Otherwise, None.
            If a matching exists, -1 for a person indicates no partner.
    """
    n = len(matching)
    output = [0 for i in range(n)]

    # convert dict to output list
    for (key, value) in matching.items():
        int_key = int(key) - 1
        int_value = int(value)

        output[int_key] = int_value

    return output

## Test Cases
A variety of test cases from (1) Irving's paper, (2) Wikipedia, (3) external implementations, and (4) any other custom cases.

### Test Case from Irving's Paper

In [2]:
paper_matching_6 = [
    [4, 6, 2, 5, 3],
    [6, 3, 5, 1, 4],
    [4, 5, 1, 6, 2],
    [2, 6, 5, 1, 3],
    [4, 2, 3, 6, 1],
    [5, 1, 4, 2, 3]
]

paper_matching_8 = [
    [2, 5, 4, 6, 7, 8, 3],
    [3, 6, 1, 7, 8, 5, 4],
    [4, 7, 2, 8, 5, 6, 1],
    [1, 8, 3, 5, 6, 7, 2],
    [6, 1, 8, 2, 3, 4, 7],
    [7, 2, 5, 3, 4, 1, 8],
    [8, 3, 6, 4, 1, 2, 5],
    [5, 4, 7, 1, 2, 3, 6]
]

paper_no_matching_4 = [
    [2, 3, 4],
    [3, 1, 4],
    [1, 2, 4],
    [1, 2, 3]
]

paper_no_matching_6 = [
    [2, 6, 4, 3, 5],
    [3, 5, 1, 6, 4],
    [1, 6, 2, 5, 4],
    [5, 2, 3, 6, 1],
    [6, 1, 3, 4, 2],
    [4, 2, 5, 1, 3]
]

In [3]:
stable_roommates(paper_matching_6, debug=True)

Input validation passed.
Stable matching found. Returning person : partner dictionary.
{'1': '6', '3': '2', '2': '3', '5': '4', '4': '5', '6': '1'}


[6, 3, 2, 5, 4, 1]

In [4]:
stable_roommates(paper_matching_8, debug=True)

Input validation passed.
Stable matching found. Returning person : partner dictionary.
{'1': '4', '3': '2', '2': '3', '5': '6', '4': '1', '7': '8', '6': '5', '8': '7'}


[4, 3, 2, 1, 6, 5, 8, 7]

In [5]:
stable_roommates(paper_no_matching_4, debug=True)

Input validation passed.
Stable matching is not possible. Failed at Phase 1: not everyone was proposed to.


In [6]:
stable_roommates(paper_no_matching_6, debug=True)

Input validation passed.
Stable matching is not possible. Failed at Phase 2: could not find an all-or-nothing cycle len > 3.


### Test Cases from Wikipedia Article (https://en.wikipedia.org/wiki/Stable_roommates_problem#Algorithm)

In [7]:
wiki_matching_6 = [
    [3, 4, 2, 6, 5],
    [6, 5, 4, 1, 3],
    [2, 4, 5, 1, 6],
    [5, 2, 3, 6, 1],
    [3, 1, 2, 4, 6],
    [5, 1, 3, 4, 2]
]

In [8]:
stable_roommates(wiki_matching_6, debug=True)

Input validation passed.
Stable matching found. Returning person : partner dictionary.
{'1': '6', '3': '5', '2': '4', '5': '3', '4': '2', '6': '1'}


[6, 4, 5, 2, 3, 1]

### Test Cases from External Implementation (http://www.dcs.gla.ac.uk/~pat/roommates/distribution/data/) 

In [9]:
external_matching_8 = [
    [2, 5, 4, 6, 7, 8, 3],
    [3, 6, 1, 7, 8, 5, 4],
    [4, 7, 2, 8, 5, 6, 1],
    [1, 8, 3, 5, 6, 7, 2],
    [6, 1, 8, 2, 3, 4, 7],
    [7, 2, 5, 3, 4, 1, 8],
    [8, 3, 6, 4, 1, 2, 5],
    [5, 4, 7, 1, 2, 3, 6]
]

external_matching_10 = [
    [8, 2, 9, 3, 6, 4, 5, 7, 10],
    [4, 3, 8, 9, 5, 1, 10, 6, 7],
    [5, 6, 8, 2, 1, 7, 10, 4, 9],
    [10, 7, 9, 3, 1, 6, 2, 5, 8],
    [7, 4, 10, 8, 2, 6, 3, 1, 9],
    [2, 8, 7, 3, 4, 10, 1, 5, 9],
    [2, 1, 8, 3, 5, 10, 4, 6, 9],
    [10, 4, 2, 5, 6, 7, 1, 3, 9],
    [6, 7, 2, 5, 10, 3, 4, 8, 1],
    [3, 1, 6, 5, 2, 9, 8, 4, 7]
]

external_matching_20 = [
    [13, 12, 20, 17, 11, 6, 8, 2, 3, 14, 4, 16, 5, 10, 18, 19, 9, 15, 7],
    [13, 6, 8, 17, 18, 19, 1, 11, 7, 4, 15, 16, 5, 9, 3, 20, 12, 10, 14],
    [6, 16, 4, 9, 14, 13, 17, 19, 8, 2, 1, 12, 20, 5, 18, 15, 7, 11, 10],
    [11, 7, 8, 2, 17, 3, 15, 6, 19, 10, 9, 5, 1, 16, 13, 20, 18, 14, 12],
    [8, 17, 14, 16, 4, 13, 15, 6, 19, 9, 12, 7, 2, 3, 11, 18, 20, 10, 1],
    [8, 13, 10, 14, 18, 15, 2, 7, 4, 16, 19, 5, 9, 17, 20, 3, 11, 12, 1],
    [13, 1, 4, 9, 19, 18, 11, 14, 10, 2, 17, 6, 15, 16, 5, 3, 12, 8, 20],
    [1, 6, 20, 7, 5, 15, 19, 4, 12, 3, 17, 9, 10, 14, 16, 2, 18, 11, 13],
    [17, 13, 3, 5, 7, 4, 12, 2, 18, 20, 15, 8, 10, 1, 6, 11, 19, 14, 16],
    [9, 4, 16, 14, 18, 17, 15, 11, 20, 13, 3, 12, 2, 1, 19, 7, 5, 8, 6],
    [6, 15, 4, 1, 18, 14, 5, 3, 9, 2, 17, 13, 8, 7, 12, 20, 19, 10, 16],
    [5, 18, 7, 16, 6, 20, 19, 14, 9, 17, 3, 1, 8, 10, 11, 13, 2, 15, 4],
    [3, 10, 7, 18, 14, 15, 1, 6, 12, 4, 8, 19, 16, 17, 5, 20, 9, 11, 2],
    [2, 5, 10, 13, 19, 17, 6, 3, 18, 7, 20, 9, 1, 4, 16, 12, 15, 8, 11],
    [12, 13, 5, 11, 2, 16, 18, 14, 1, 6, 17, 8, 19, 4, 10, 7, 20, 3, 9],
    [1, 7, 6, 5, 14, 18, 12, 17, 20, 11, 15, 10, 2, 13, 3, 8, 19, 9, 4],
    [5, 8, 15, 9, 7, 18, 11, 10, 19, 2, 1, 12, 3, 14, 20, 13, 6, 16, 4],
    [14, 3, 8, 10, 13, 5, 9, 15, 12, 1, 17, 6, 16, 11, 2, 7, 4, 19, 20],
    [9, 15, 20, 12, 18, 1, 11, 5, 3, 2, 13, 14, 10, 7, 6, 16, 8, 17, 4],
    [5, 6, 18, 19, 16, 7, 4, 9, 2, 17, 8, 15, 1, 12, 13, 10, 14, 3, 11]
]

# matching exists if algorithm leaves 7 unmatched
external_matching_7 = [
    [3, 4, 2, 6, 5, 7], 
    [6, 5, 4, 1, 3, 7], 
    [2, 4, 5, 1, 6, 7], 
    [5, 2, 3, 6, 1, 7],
    [3, 1, 2, 4, 6, 7],
    [5, 1, 3, 4, 2, 7],
    [1, 2, 3, 4, 5, 6]
]

In [10]:
stable_roommates(external_matching_8, debug=True)

Input validation passed.
Stable matching found. Returning person : partner dictionary.
{'1': '4', '3': '2', '2': '3', '5': '6', '4': '1', '7': '8', '6': '5', '8': '7'}


[4, 3, 2, 1, 6, 5, 8, 7]

In [11]:
stable_roommates(external_matching_10, debug=True)

Input validation passed.
Stable matching found. Returning person : partner dictionary.
{'10': '8', '1': '4', '3': '6', '2': '9', '5': '7', '4': '1', '7': '5', '6': '3', '9': '2', '8': '10'}


[4, 9, 6, 1, 7, 3, 5, 10, 2, 8]

In [12]:
stable_roommates(external_matching_20, debug=True)

Input validation passed.
Stable matching found. Returning person : partner dictionary.
{'11': '15', '10': '14', '13': '7', '12': '16', '15': '11', '14': '10', '17': '5', '16': '12', '19': '20', '18': '9', '1': '8', '3': '4', '2': '6', '5': '17', '4': '3', '7': '13', '6': '2', '9': '18', '20': '19', '8': '1'}


[8, 6, 4, 3, 17, 2, 13, 1, 18, 14, 15, 16, 7, 10, 11, 12, 5, 9, 20, 19]

In [13]:
stable_roommates(external_matching_7, debug=True)

Input validation passed.
Stable matching found. Returning person : partner dictionary.
{'1': '6', '3': '5', '2': '4', '5': '3', '4': '2', '7': '-1', '6': '1'}


[6, 4, 5, 2, 3, 1, -1]

### Custom Test Cases

In [14]:
# empty matrix
custom_no_matching_empty = []

# one person (no matching should be possible)
custom_no_matching_1 = [[]]

# two people
custom_matching_2 = [[2], [1]]

# three people (odd: should add person and find a matching)
custom_matching_3 = [
    [3, 2],
    [3, 1],
    [1, 2]
]

In [15]:
stable_roommates(custom_no_matching_empty, debug=True)

Input validation failed: preference_matrix must have size > 1
Invalid input. Must be n-by-m (where m = n - 1) list of lists of numbers.


In [16]:
stable_roommates(custom_no_matching_1, debug=True)

Input validation failed: preference_matrix must have size > 1
Invalid input. Must be n-by-m (where m = n - 1) list of lists of numbers.


In [17]:
stable_roommates(custom_matching_2, debug=True)

Input validation passed.
Stable matching found. Returning person : partner dictionary.
{'1': '2', '2': '1'}


[2, 1]

In [18]:
stable_roommates(custom_matching_3, debug=True)

Input validation passed.
Stable matching found. Returning person : partner dictionary.
{'1': '3', '3': '1', '2': '-1'}


[3, -1, 1]

## Analysis of Stable Roommates Matching on Pair Research Data
Below, we analyze the impact of using the Stable Roommates algorithm on previous pairings. 

We begin by seeing
1. How frequently can we find stable matchings? 
2. When stable matchings are not possible, for what reason do they fail?

In [19]:
import pandas as pd
from pymongo import MongoClient

import seaborn as sns
%matplotlib inline

In [20]:
uri = 'mongodb://user:pwd@ds011419.mlab.com:11419/pair-research'
dbName = 'pair-research'
client = MongoClient(uri)
db = client[dbName]
db.collection_names()

[u'affinities',
 u'meteor_accounts_loginServiceConfiguration',
 u'tasks_history',
 u'groups',
 u'users',
 u'objectlabs-system.admin.collections',
 u'pairs_history',
 u'tasks',
 u'system.indexes',
 u'pairings',
 u'objectlabs-system',
 u'affinities_history']

In [21]:
users = pd.DataFrame(list(db.users.find()))
users.head()

Unnamed: 0,_id,createdAt,emails,groups,profile,services
0,dibWQsjhkpvC52AFp,2016-08-16 15:54:28.489,"[{u'verified': False, u'address': u'hjlkadfjkl...",[],{u'fullName': u'hihi'},{u'password': {u'bcrypt': u'$2a$10$dkjBKl9Po3A...
1,BPQ7hyoHgghctHPqq,2016-08-29 18:24:50.295,"[{u'verified': True, u'address': u'egerber@nor...","[{u'isPending': False, u'groupName': u'Delta L...",{u'fullName': u'Liz Gerber'},{u'password': {u'bcrypt': u'$2a$10$Q9SnAxCEjS1...
2,bZEjadPH7KrjM9PfD,2016-11-10 19:19:34.147,"[{u'verified': False, u'address': u'ampiper@no...","[{u'isPending': True, u'groupName': u'Segal De...",{u'fullName': u'ampiper@northwestern.edu'},{u'password': {}}
3,8mRni9ixefux6bSz9,2016-12-09 01:55:36.706,"[{u'verified': False, u'address': u'hscho122@k...",[],{u'fullName': u'hscho122@kaist.ac.kr'},{u'password': {}}
4,JXCrPvRJwM5pK4Wk7,2017-01-05 07:05:46.455,"[{u'verified': False, u'address': u'artydevelo...",[],"{u'fullName': u'Deokseong', u'avatar': u'http:...",{u'password': {u'bcrypt': u'$2a$10$Obf8jHjBnkq...


In [22]:
groups = pd.DataFrame(list(db.groups.find()))
groups.head()

Unnamed: 0,_id,active,activePairing,creationDate,creatorId,creatorName,description,groupName,members,roles
0,tbyuDjH5WGMJbJE2d,True,,2016-07-11 19:09:30.520,n3acFJs3SiDorYxMS,Kevin Chen,First.,Kevin's Primordial Group,"[{u'isPending': False, u'role': {u'_id': u'xcZ...","[{u'_id': u'z9rCTyfP827Bsv7dB', u'title': u'Ra..."
1,HfsBYcpG9NgicEdZr,True,,2016-08-10 18:54:36.396,vRoD2rA6fmkZLx636,Leesha,blah,test group,"[{u'isPending': False, u'role': {u'_id': u'Auw...","[{u'_id': u'Auw8ZeNpaZkqRcwm3', u'title': u'Pr..."
2,ZzJRhB7AEd4AS9BvK,True,,2016-08-16 15:44:46.098,33333333333333333,Demo Admin,A demo pair research group,8j4uj38o5qoxbt9,[],"[{u'_id': u'naXttGNRPG6CtYNYi', u'title': u'Pr..."
3,PXqoCFnwQyLoxPKgw,True,,2016-10-20 21:22:35.309,33333333333333333,Demo Admin,A demo pair research group,k9fffpcpoxtlz0k9,[],"[{u'_id': u'SEtx2m82qF7cKHAER', u'title': u'Pr..."
4,KB5xeD6dnwo3w82dz,True,,2016-10-24 02:04:58.469,33333333333333333,Demo Admin,A demo pair research group,uj9sjlqr3rspp66r,[],"[{u'_id': u'GWWQow7F4hfF7jM26', u'title': u'Pr..."


In [23]:
tasks_history = pd.DataFrame(list(db.tasks_history.find()))
tasks_history.head()

Unnamed: 0,_id,groupId,name,pairingId,task,userId
0,MRpqhrhDq4ssfRS5e,4xEwAguXinpkMNDqR,Stella,85rSWzajxBtguuds6,swim lessons,CELY4ynWRe3b4Te9c
1,j2zmfAC3G6kjKpKe7,4xEwAguXinpkMNDqR,Kevin Chen,85rSWzajxBtguuds6,plauying,n3acFJs3SiDorYxMS
2,k4ewZSgDHsvDFkXpX,9mdkMmj4pY8Q2TwqF,Yongsung Kim,nRAQpsPhsQs4zRvTL,i need to send out a short-survey to interviewees,EDEFWcagLwCfXP5Jg
3,RZZWR8pABaJBKYNFu,9mdkMmj4pY8Q2TwqF,Julian Vicens,nRAQpsPhsQs4zRvTL,I would like to talk about different ways to m...,goGr47HDwtfphJ5xK
4,Xr3dvNreiwzq9ixrQ,9mdkMmj4pY8Q2TwqF,Spencer Carlson,nRAQpsPhsQs4zRvTL,Make educated guesses about the quality of my ...,vbsF64nAgoitwrNeB


In [24]:
pairings = pd.DataFrame(list(db.pairings.find()))
pairings.sort_values('timestamp', ascending=False).head()

Unnamed: 0,_id,groupId,pairings,timestamp
372,3f9M6GYoDodoyTB8y,SMMBkzLY5Z4kpt9LT,"[{u'secondUserId': u'HRb63v7L3bR4MRMbk', u'fir...",2018-02-02 21:57:45.916
371,DNuREv7SSh3zNqzBD,SMMBkzLY5Z4kpt9LT,"[{u'secondUserId': u'9towMubAzxYvcf83Z', u'fir...",2018-02-02 21:57:26.249
370,WCtNuhbCAQ59NJmgW,SMMBkzLY5Z4kpt9LT,"[{u'secondUserId': u'HRb63v7L3bR4MRMbk', u'fir...",2018-02-02 21:53:33.528
369,uvvvibjQYezCY5sey,SMMBkzLY5Z4kpt9LT,"[{u'secondUserId': u'3H4HJmMiEEJRrxEC5', u'fir...",2018-02-02 21:53:31.811
368,rpvvJpcJBrYhxRgK7,SMMBkzLY5Z4kpt9LT,"[{u'firstUserName': u'Shiyan Yan', u'firstUser...",2018-02-02 21:50:31.911


In [25]:
affinities = pd.DataFrame(list(db.affinities.find()))
affinities.head()

Unnamed: 0,_id,groupId,helpeeId,helperId,value
0,e6rjGWDrWE5YKxdbh,NRg4vMMoxEAqTHazP,AX8FFZHzPa8eF8bBE,SFg6T8vhT56EeCkRX,5.0
1,mSnrrMX7y26NSQ7iN,NRg4vMMoxEAqTHazP,SFg6T8vhT56EeCkRX,AX8FFZHzPa8eF8bBE,5.0
2,w72kT4Ez7xYkfE8JF,NRg4vMMoxEAqTHazP,5FjQBco6MXaSFhap4,AX8FFZHzPa8eF8bBE,1.0
3,c5xFCfvPimbBsnsGg,NRg4vMMoxEAqTHazP,hkZoyLhrWetKwep3r,AX8FFZHzPa8eF8bBE,4.0
4,bbTEQ3mvL46mTTskJ,NRg4vMMoxEAqTHazP,AX8FFZHzPa8eF8bBE,5FjQBco6MXaSFhap4,5.0


In [26]:
pairs_history = pd.DataFrame(list(db.pairs_history.find()))
pairs_history[pairs_history['groupId'] == 'SMMBkzLY5Z4kpt9LT'].head()

Unnamed: 0,_id,firstUserId,firstUserName,firstUserRole,groupId,pairingId,secondUserId,secondUserName,secondUserRole,timestamp
1547,SzGQWg2dYScJkFPJ3,Bfbyg3o67BZbTrXXR,Shiyan Yan,Graduate Student,SMMBkzLY5Z4kpt9LT,rpvvJpcJBrYhxRgK7,,,,2018-02-02 21:50:31.911
1548,RKaXBuzSxvzgeNZza,HRb63v7L3bR4MRMbk,Stephanie O'Keefe,Professor,SMMBkzLY5Z4kpt9LT,uvvvibjQYezCY5sey,3H4HJmMiEEJRrxEC5,Harman Kaur,Graduate Student,2018-02-02 21:53:31.811
1549,7akpo6Fcw8GczWeeD,Bfbyg3o67BZbTrXXR,Shiyan Yan,Graduate Student,SMMBkzLY5Z4kpt9LT,uvvvibjQYezCY5sey,,,,2018-02-02 21:53:31.811
1550,WuyMz2WgHw6zGxNTk,3H4HJmMiEEJRrxEC5,Harman Kaur,Graduate Student,SMMBkzLY5Z4kpt9LT,WCtNuhbCAQ59NJmgW,HRb63v7L3bR4MRMbk,Stephanie O'Keefe,Professor,2018-02-02 21:53:33.528
1551,4o5fyeDB8TjdJRrtK,Bfbyg3o67BZbTrXXR,Shiyan Yan,Graduate Student,SMMBkzLY5Z4kpt9LT,WCtNuhbCAQ59NJmgW,,,,2018-02-02 21:53:33.528


In [27]:
affinities_history = pd.DataFrame(list(db.affinities_history.find()))
affinities_history['group_pairing_id'] = affinities_history['groupId'] + '-' + affinities_history['pairingId']
affinities_history.head()

Unnamed: 0,_id,groupId,helpeeId,helperId,pairingId,value,group_pairing_id
0,Ae74k8Yvq3i4Koj3A,4xEwAguXinpkMNDqR,n3acFJs3SiDorYxMS,CELY4ynWRe3b4Te9c,85rSWzajxBtguuds6,0.33,4xEwAguXinpkMNDqR-85rSWzajxBtguuds6
1,6o2XqHLfzwsPfTeEL,4xEwAguXinpkMNDqR,CELY4ynWRe3b4Te9c,n3acFJs3SiDorYxMS,85rSWzajxBtguuds6,1.0,4xEwAguXinpkMNDqR-85rSWzajxBtguuds6
2,Ny2qkvoqSMQuZE865,9mdkMmj4pY8Q2TwqF,goGr47HDwtfphJ5xK,EDEFWcagLwCfXP5Jg,nRAQpsPhsQs4zRvTL,-1.0,9mdkMmj4pY8Q2TwqF-nRAQpsPhsQs4zRvTL
3,rMiXTvAQRsHoSbpn3,9mdkMmj4pY8Q2TwqF,vbsF64nAgoitwrNeB,EDEFWcagLwCfXP5Jg,nRAQpsPhsQs4zRvTL,-1.0,9mdkMmj4pY8Q2TwqF-nRAQpsPhsQs4zRvTL
4,DfqvpcJEXZ5eKgPDs,9mdkMmj4pY8Q2TwqF,goGr47HDwtfphJ5xK,vbsF64nAgoitwrNeB,nRAQpsPhsQs4zRvTL,-1.0,9mdkMmj4pY8Q2TwqF-nRAQpsPhsQs4zRvTL


In [28]:
def create_affinity_matrix(affinities): 
    """
    Creates an n^2 affinity matrix.
        
    Input:
        affinities (pandas DataFrame): dataframe with helpeeId, helperId, and value columns.
    
    Output:
        (list of list of numbers): matrix of affinities. 0 if no affinity between users.
    """
    # create user superset and user:index mapping
    user_superset = list(set(list(affinities['helperId']) + list(affinities['helpeeId'])))
    user_count = len(user_superset)
    user_index_dict = {user_superset[x]: x for x in range(user_count)}
    
    # create empty n^2 matrix
    affinity_matrix = [[0 for y in range(user_count)] for x in range(user_count)]
    
    # remap data values to UI values
    value_mappings = {
        '-1.0': 1,
        '0.0': 2,
        '0.33': 3,
        '0.66': 4,
        '1.0': 5
    }
    affinities['value'] = affinities['value'].astype(str)
    affinities.replace({"value": value_mappings}, inplace=True)
    affinities.drop_duplicates(inplace=True)
    
    # loop through data and populate matrix
    for index, row in affinities.iterrows():
        curr_helper_index = user_index_dict[row['helperId']]
        curr_helpee_index = user_index_dict[row['helpeeId']]
        curr_value = row['value']
        
        affinity_matrix[curr_helper_index][curr_helpee_index] = curr_value
        
    return affinity_matrix

def create_preference_matrix(affinity_matrix):
    """
    Converts an n^2 affinity matrix into a n-by-m preference matrix (where m = n - 1).
    
    Input: 
        affinity_matrix (list of list of numbers): matrix of affinities. 0 if no affinity between users.
    
    Return: 
        (list of list of numbers): preference matrix where each list is ordered list of person indices.
    """
    # create zipped lists of (index, rating)
    preference_matrix = [[(i + 1, value) for i, value in enumerate(x)] for x in affinity_matrix]
    
    # format each row
    for index, curr_person in enumerate(preference_matrix):
        curr_person.sort(key=lambda tup: tup[1], reverse=True)
        preference_matrix[index] = [person_rating[0] for person_rating in curr_person if person_rating[0] - 1 != index]
        
    return preference_matrix

In [29]:
stable_count = 0
unstable_count = 0
total = 0

for group_pair_id in affinities_history.group_pairing_id.unique():
    # get data and create affinity + preference matrices
    curr_data = affinities_history[affinities_history['group_pairing_id'] == group_pair_id][['helperId', 'helpeeId', 'value']]
    curr_affinity_matrix = create_affinity_matrix(curr_data)
    curr_pref_matrix = create_preference_matrix(curr_affinity_matrix)
    
    # run stable roommates
    if stable_roommates(curr_pref_matrix) is not None:
        stable_count += 1
    else:
        unstable_count += 1
    total += 1

In [30]:
print('Stable Count: {}, Unstable Count: {}, Total: {} | Proportion: {}%'.format(stable_count, unstable_count, total, round(100 * stable_count / total, 2)))

Stable Count: 197, Unstable Count: 101, Total: 298 | Proportion: 66.11%
