# Stable Matching Pair Research
Implementation of [Stable Roomates](http://www.dcs.gla.ac.uk/~pat/jchoco/roommates/papers/Comp_sdarticle.pdf) for [Pair Research](http://pairresearch.io/). 

# Load in Libraries and Stable Roommates Matching Module

In [1]:
%load_ext autoreload
%autoreload 2

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from copy import deepcopy

# load stable roommates code
from stable_roommates import stable_matching_wrapper as sr_matching

## Stable Roommates Matching Test Cases
A variety of test cases from (1) Irving's paper, (2) Wikipedia, (3) external implementations, and (4) any other custom cases.

### Test Case from Irving's Paper

In [2]:
paper_matching_6 = [
    [4, 6, 2, 5, 3],
    [6, 3, 5, 1, 4],
    [4, 5, 1, 6, 2],
    [2, 6, 5, 1, 3],
    [4, 2, 3, 6, 1],
    [5, 1, 4, 2, 3]
]

paper_matching_8 = [
    [2, 5, 4, 6, 7, 8, 3],
    [3, 6, 1, 7, 8, 5, 4],
    [4, 7, 2, 8, 5, 6, 1],
    [1, 8, 3, 5, 6, 7, 2],
    [6, 1, 8, 2, 3, 4, 7],
    [7, 2, 5, 3, 4, 1, 8],
    [8, 3, 6, 4, 1, 2, 5],
    [5, 4, 7, 1, 2, 3, 6]
]

paper_no_matching_4 = [
    [2, 3, 4],
    [3, 1, 4],
    [1, 2, 4],
    [1, 2, 3]
]

paper_no_matching_6 = [
    [2, 6, 4, 3, 5],
    [3, 5, 1, 6, 4],
    [1, 6, 2, 5, 4],
    [5, 2, 3, 6, 1],
    [6, 1, 3, 4, 2],
    [4, 2, 5, 1, 3]
]

In [3]:
sr_matching(paper_matching_6, handle_odd_method='remove', debug=True)

Input validation passed.
Preference Dict: {'1': ['4', '6', '2', '5', '3'], '2': ['6', '3', '5', '1', '4'], '3': ['4', '5', '1', '6', '2'], '4': ['2', '6', '5', '1', '3'], '5': ['4', '2', '3', '6', '1'], '6': ['5', '1', '4', '2', '3']}
Ranks Dict: {'1': {'4': 0, '6': 1, '2': 2, '5': 3, '3': 4}, '2': {'6': 0, '3': 1, '5': 2, '1': 3, '4': 4}, '3': {'4': 0, '5': 1, '1': 2, '6': 3, '2': 4}, '4': {'2': 0, '6': 1, '5': 2, '1': 3, '3': 4}, '5': {'4': 0, '2': 1, '3': 2, '6': 3, '1': 4}, '6': {'5': 0, '1': 1, '4': 2, '2': 3, '3': 4}}
Stable matching found. Returning person : partner dictionary.
{'1': '6', '2': '3', '3': '2', '4': '5', '5': '4', '6': '1'}


([5, 2, 1, 4, 3, 0], 'Stable matching found after Phase 2.')

In [4]:
sr_matching(paper_matching_8, handle_odd_method='remove', debug=True)

Input validation passed.
Preference Dict: {'1': ['2', '5', '4', '6', '7', '8', '3'], '2': ['3', '6', '1', '7', '8', '5', '4'], '3': ['4', '7', '2', '8', '5', '6', '1'], '4': ['1', '8', '3', '5', '6', '7', '2'], '5': ['6', '1', '8', '2', '3', '4', '7'], '6': ['7', '2', '5', '3', '4', '1', '8'], '7': ['8', '3', '6', '4', '1', '2', '5'], '8': ['5', '4', '7', '1', '2', '3', '6']}
Ranks Dict: {'1': {'2': 0, '5': 1, '4': 2, '6': 3, '7': 4, '8': 5, '3': 6}, '2': {'3': 0, '6': 1, '1': 2, '7': 3, '8': 4, '5': 5, '4': 6}, '3': {'4': 0, '7': 1, '2': 2, '8': 3, '5': 4, '6': 5, '1': 6}, '4': {'1': 0, '8': 1, '3': 2, '5': 3, '6': 4, '7': 5, '2': 6}, '5': {'6': 0, '1': 1, '8': 2, '2': 3, '3': 4, '4': 5, '7': 6}, '6': {'7': 0, '2': 1, '5': 2, '3': 3, '4': 4, '1': 5, '8': 6}, '7': {'8': 0, '3': 1, '6': 2, '4': 3, '1': 4, '2': 5, '5': 6}, '8': {'5': 0, '4': 1, '7': 2, '1': 3, '2': 4, '3': 5, '6': 6}}
Stable matching found. Returning person : partner dictionary.
{'1': '4', '2': '3', '3': '2', '4': '1', '

([3, 2, 1, 0, 5, 4, 7, 6], 'Stable matching found after Phase 2.')

In [5]:
sr_matching(paper_no_matching_4, handle_odd_method='remove', debug=True)

Input validation passed.
Preference Dict: {'1': ['2', '3', '4'], '2': ['3', '1', '4'], '3': ['1', '2', '4'], '4': ['1', '2', '3']}
Ranks Dict: {'1': {'2': 0, '3': 1, '4': 2}, '2': {'3': 0, '1': 1, '4': 2}, '3': {'1': 0, '2': 1, '4': 2}, '4': {'1': 0, '2': 1, '3': 2}}
Stable matching is not possible. Failed at Phase 1: not everyone was proposed to.


(None, 'Failed at Phase 1: not everyone was proposed to.')

In [6]:
sr_matching(paper_no_matching_6, handle_odd_method='remove', debug=True)

Input validation passed.
Preference Dict: {'1': ['2', '6', '4', '3', '5'], '2': ['3', '5', '1', '6', '4'], '3': ['1', '6', '2', '5', '4'], '4': ['5', '2', '3', '6', '1'], '5': ['6', '1', '3', '4', '2'], '6': ['4', '2', '5', '1', '3']}
Ranks Dict: {'1': {'2': 0, '6': 1, '4': 2, '3': 3, '5': 4}, '2': {'3': 0, '5': 1, '1': 2, '6': 3, '4': 4}, '3': {'1': 0, '6': 1, '2': 2, '5': 3, '4': 4}, '4': {'5': 0, '2': 1, '3': 2, '6': 3, '1': 4}, '5': {'6': 0, '1': 1, '3': 2, '4': 3, '2': 4}, '6': {'4': 0, '2': 1, '5': 2, '1': 3, '3': 4}}
Stable matching is not possible. Failed at Phase 2: could not find an all-or-nothing cycle len > 3.


(None, 'Failed at Phase 2: could not find an all-or-nothing cycle len > 3.')

### Test Cases from Wikipedia Article (https://en.wikipedia.org/wiki/Stable_roommates_problem#Algorithm)

In [7]:
wiki_matching_6 = [
    [3, 4, 2, 6, 5],
    [6, 5, 4, 1, 3],
    [2, 4, 5, 1, 6],
    [5, 2, 3, 6, 1],
    [3, 1, 2, 4, 6],
    [5, 1, 3, 4, 2]
]

In [8]:
sr_matching(wiki_matching_6, handle_odd_method='remove', debug=True)

Input validation passed.
Preference Dict: {'1': ['3', '4', '2', '6', '5'], '2': ['6', '5', '4', '1', '3'], '3': ['2', '4', '5', '1', '6'], '4': ['5', '2', '3', '6', '1'], '5': ['3', '1', '2', '4', '6'], '6': ['5', '1', '3', '4', '2']}
Ranks Dict: {'1': {'3': 0, '4': 1, '2': 2, '6': 3, '5': 4}, '2': {'6': 0, '5': 1, '4': 2, '1': 3, '3': 4}, '3': {'2': 0, '4': 1, '5': 2, '1': 3, '6': 4}, '4': {'5': 0, '2': 1, '3': 2, '6': 3, '1': 4}, '5': {'3': 0, '1': 1, '2': 2, '4': 3, '6': 4}, '6': {'5': 0, '1': 1, '3': 2, '4': 3, '2': 4}}
Stable matching found. Returning person : partner dictionary.
{'1': '6', '2': '4', '3': '5', '4': '2', '5': '3', '6': '1'}


([5, 3, 4, 1, 2, 0], 'Stable matching found after Phase 2.')

### Test Cases from External Implementation (http://www.dcs.gla.ac.uk/~pat/roommates/distribution/data/) 

In [9]:
external_matching_8 = [
    [2, 5, 4, 6, 7, 8, 3],
    [3, 6, 1, 7, 8, 5, 4],
    [4, 7, 2, 8, 5, 6, 1],
    [1, 8, 3, 5, 6, 7, 2],
    [6, 1, 8, 2, 3, 4, 7],
    [7, 2, 5, 3, 4, 1, 8],
    [8, 3, 6, 4, 1, 2, 5],
    [5, 4, 7, 1, 2, 3, 6]
]

external_matching_10 = [
    [8, 2, 9, 3, 6, 4, 5, 7, 10],
    [4, 3, 8, 9, 5, 1, 10, 6, 7],
    [5, 6, 8, 2, 1, 7, 10, 4, 9],
    [10, 7, 9, 3, 1, 6, 2, 5, 8],
    [7, 4, 10, 8, 2, 6, 3, 1, 9],
    [2, 8, 7, 3, 4, 10, 1, 5, 9],
    [2, 1, 8, 3, 5, 10, 4, 6, 9],
    [10, 4, 2, 5, 6, 7, 1, 3, 9],
    [6, 7, 2, 5, 10, 3, 4, 8, 1],
    [3, 1, 6, 5, 2, 9, 8, 4, 7]
]

external_matching_20 = [
    [13, 12, 20, 17, 11, 6, 8, 2, 3, 14, 4, 16, 5, 10, 18, 19, 9, 15, 7],
    [13, 6, 8, 17, 18, 19, 1, 11, 7, 4, 15, 16, 5, 9, 3, 20, 12, 10, 14],
    [6, 16, 4, 9, 14, 13, 17, 19, 8, 2, 1, 12, 20, 5, 18, 15, 7, 11, 10],
    [11, 7, 8, 2, 17, 3, 15, 6, 19, 10, 9, 5, 1, 16, 13, 20, 18, 14, 12],
    [8, 17, 14, 16, 4, 13, 15, 6, 19, 9, 12, 7, 2, 3, 11, 18, 20, 10, 1],
    [8, 13, 10, 14, 18, 15, 2, 7, 4, 16, 19, 5, 9, 17, 20, 3, 11, 12, 1],
    [13, 1, 4, 9, 19, 18, 11, 14, 10, 2, 17, 6, 15, 16, 5, 3, 12, 8, 20],
    [1, 6, 20, 7, 5, 15, 19, 4, 12, 3, 17, 9, 10, 14, 16, 2, 18, 11, 13],
    [17, 13, 3, 5, 7, 4, 12, 2, 18, 20, 15, 8, 10, 1, 6, 11, 19, 14, 16],
    [9, 4, 16, 14, 18, 17, 15, 11, 20, 13, 3, 12, 2, 1, 19, 7, 5, 8, 6],
    [6, 15, 4, 1, 18, 14, 5, 3, 9, 2, 17, 13, 8, 7, 12, 20, 19, 10, 16],
    [5, 18, 7, 16, 6, 20, 19, 14, 9, 17, 3, 1, 8, 10, 11, 13, 2, 15, 4],
    [3, 10, 7, 18, 14, 15, 1, 6, 12, 4, 8, 19, 16, 17, 5, 20, 9, 11, 2],
    [2, 5, 10, 13, 19, 17, 6, 3, 18, 7, 20, 9, 1, 4, 16, 12, 15, 8, 11],
    [12, 13, 5, 11, 2, 16, 18, 14, 1, 6, 17, 8, 19, 4, 10, 7, 20, 3, 9],
    [1, 7, 6, 5, 14, 18, 12, 17, 20, 11, 15, 10, 2, 13, 3, 8, 19, 9, 4],
    [5, 8, 15, 9, 7, 18, 11, 10, 19, 2, 1, 12, 3, 14, 20, 13, 6, 16, 4],
    [14, 3, 8, 10, 13, 5, 9, 15, 12, 1, 17, 6, 16, 11, 2, 7, 4, 19, 20],
    [9, 15, 20, 12, 18, 1, 11, 5, 3, 2, 13, 14, 10, 7, 6, 16, 8, 17, 4],
    [5, 6, 18, 19, 16, 7, 4, 9, 2, 17, 8, 15, 1, 12, 13, 10, 14, 3, 11]
]

# matching exists if algorithm leaves 7 unmatched
external_matching_7 = [
    [3, 4, 2, 6, 5, 7], 
    [6, 5, 4, 1, 3, 7], 
    [2, 4, 5, 1, 6, 7], 
    [5, 2, 3, 6, 1, 7],
    [3, 1, 2, 4, 6, 7],
    [5, 1, 3, 4, 2, 7],
    [1, 2, 3, 4, 5, 6]
]

In [10]:
sr_matching(external_matching_8, handle_odd_method='remove', debug=True)

Input validation passed.
Preference Dict: {'1': ['2', '5', '4', '6', '7', '8', '3'], '2': ['3', '6', '1', '7', '8', '5', '4'], '3': ['4', '7', '2', '8', '5', '6', '1'], '4': ['1', '8', '3', '5', '6', '7', '2'], '5': ['6', '1', '8', '2', '3', '4', '7'], '6': ['7', '2', '5', '3', '4', '1', '8'], '7': ['8', '3', '6', '4', '1', '2', '5'], '8': ['5', '4', '7', '1', '2', '3', '6']}
Ranks Dict: {'1': {'2': 0, '5': 1, '4': 2, '6': 3, '7': 4, '8': 5, '3': 6}, '2': {'3': 0, '6': 1, '1': 2, '7': 3, '8': 4, '5': 5, '4': 6}, '3': {'4': 0, '7': 1, '2': 2, '8': 3, '5': 4, '6': 5, '1': 6}, '4': {'1': 0, '8': 1, '3': 2, '5': 3, '6': 4, '7': 5, '2': 6}, '5': {'6': 0, '1': 1, '8': 2, '2': 3, '3': 4, '4': 5, '7': 6}, '6': {'7': 0, '2': 1, '5': 2, '3': 3, '4': 4, '1': 5, '8': 6}, '7': {'8': 0, '3': 1, '6': 2, '4': 3, '1': 4, '2': 5, '5': 6}, '8': {'5': 0, '4': 1, '7': 2, '1': 3, '2': 4, '3': 5, '6': 6}}
Stable matching found. Returning person : partner dictionary.
{'1': '4', '2': '3', '3': '2', '4': '1', '

([3, 2, 1, 0, 5, 4, 7, 6], 'Stable matching found after Phase 2.')

In [11]:
sr_matching(external_matching_10, handle_odd_method='remove', debug=True)

Input validation passed.
Preference Dict: {'1': ['8', '2', '9', '3', '6', '4', '5', '7', '10'], '2': ['4', '3', '8', '9', '5', '1', '10', '6', '7'], '3': ['5', '6', '8', '2', '1', '7', '10', '4', '9'], '4': ['10', '7', '9', '3', '1', '6', '2', '5', '8'], '5': ['7', '4', '10', '8', '2', '6', '3', '1', '9'], '6': ['2', '8', '7', '3', '4', '10', '1', '5', '9'], '7': ['2', '1', '8', '3', '5', '10', '4', '6', '9'], '8': ['10', '4', '2', '5', '6', '7', '1', '3', '9'], '9': ['6', '7', '2', '5', '10', '3', '4', '8', '1'], '10': ['3', '1', '6', '5', '2', '9', '8', '4', '7']}
Ranks Dict: {'1': {'8': 0, '2': 1, '9': 2, '3': 3, '6': 4, '4': 5, '5': 6, '7': 7, '10': 8}, '2': {'4': 0, '3': 1, '8': 2, '9': 3, '5': 4, '1': 5, '10': 6, '6': 7, '7': 8}, '3': {'5': 0, '6': 1, '8': 2, '2': 3, '1': 4, '7': 5, '10': 6, '4': 7, '9': 8}, '4': {'10': 0, '7': 1, '9': 2, '3': 3, '1': 4, '6': 5, '2': 6, '5': 7, '8': 8}, '5': {'7': 0, '4': 1, '10': 2, '8': 3, '2': 4, '6': 5, '3': 6, '1': 7, '9': 8}, '6': {'2': 0, 

([6, 7, 5, 8, 9, 2, 0, 1, 3, 4], 'Stable matching found after Phase 2.')

In [12]:
sr_matching(external_matching_20, handle_odd_method='remove', debug=True)

Input validation passed.
Preference Dict: {'1': ['13', '12', '20', '17', '11', '6', '8', '2', '3', '14', '4', '16', '5', '10', '18', '19', '9', '15', '7'], '2': ['13', '6', '8', '17', '18', '19', '1', '11', '7', '4', '15', '16', '5', '9', '3', '20', '12', '10', '14'], '3': ['6', '16', '4', '9', '14', '13', '17', '19', '8', '2', '1', '12', '20', '5', '18', '15', '7', '11', '10'], '4': ['11', '7', '8', '2', '17', '3', '15', '6', '19', '10', '9', '5', '1', '16', '13', '20', '18', '14', '12'], '5': ['8', '17', '14', '16', '4', '13', '15', '6', '19', '9', '12', '7', '2', '3', '11', '18', '20', '10', '1'], '6': ['8', '13', '10', '14', '18', '15', '2', '7', '4', '16', '19', '5', '9', '17', '20', '3', '11', '12', '1'], '7': ['13', '1', '4', '9', '19', '18', '11', '14', '10', '2', '17', '6', '15', '16', '5', '3', '12', '8', '20'], '8': ['1', '6', '20', '7', '5', '15', '19', '4', '12', '3', '17', '9', '10', '14', '16', '2', '18', '11', '13'], '9': ['17', '13', '3', '5', '7', '4', '12', '2', '18'

([7, 3, 8, 1, 16, 13, 12, 0, 2, 15, 14, 17, 6, 5, 10, 9, 4, 11, 19, 18],
 'Stable matching found after Phase 2.')

In [13]:
sr_matching(external_matching_7, handle_odd_method='remove', debug=True)

Input validation passed.
Removing person 1 (matrix index), 2 (dict index)
Preference Dict: {'1': ['3', '4', '6', '5', '7'], '3': ['4', '5', '1', '6', '7'], '4': ['5', '3', '6', '1', '7'], '5': ['3', '1', '4', '6', '7'], '6': ['5', '1', '3', '4', '7'], '7': ['1', '3', '4', '5', '6']}
Ranks Dict: {'1': {'3': 0, '4': 1, '6': 2, '5': 3, '7': 4}, '3': {'4': 0, '5': 1, '1': 2, '6': 3, '7': 4}, '4': {'5': 0, '3': 1, '6': 2, '1': 3, '7': 4}, '5': {'3': 0, '1': 1, '4': 2, '6': 3, '7': 4}, '6': {'5': 0, '1': 1, '3': 2, '4': 3, '7': 4}, '7': {'1': 0, '3': 1, '4': 2, '5': 3, '6': 4}}
Stable matching is not possible. Failed at Phase 1: not everyone was proposed to.
Removing person 3 (matrix index), 4 (dict index)
Preference Dict: {'1': ['3', '2', '6', '5', '7'], '2': ['6', '5', '1', '3', '7'], '3': ['2', '5', '1', '6', '7'], '5': ['3', '1', '2', '6', '7'], '6': ['5', '1', '3', '2', '7'], '7': ['1', '2', '3', '5', '6']}
Ranks Dict: {'1': {'3': 0, '2': 1, '6': 2, '5': 3, '7': 4}, '2': {'6': 0, '5': 1

([5, 3, 4, 1, 2, 0, -1], 'Stable matching found after Phase 2.')

### Custom Test Cases

In [14]:
# empty matrix
custom_no_matching_empty = []

# one person (no matching should be possible)
custom_no_matching_1 = [[]]

# two people
custom_matching_2 = [[2], [1]]

# three people (odd: should add person and find a matching)
custom_matching_3 = [
    [3, 2],
    [3, 1],
    [1, 2]
]

In [15]:
sr_matching(custom_no_matching_empty, handle_odd_method='remove', debug=True)

Input validation failed: preference_matrix must have size > 1
Invalid input. Must be n-by-m (where m = n - 1) list of lists of numbers.


(None,
 'Invalid input. Must be n-by-m (where m = n - 1) list of lists of numbers.')

In [16]:
sr_matching(custom_no_matching_1, handle_odd_method='remove', debug=True)

Input validation failed: preference_matrix must have size > 1
Invalid input. Must be n-by-m (where m = n - 1) list of lists of numbers.


(None,
 'Invalid input. Must be n-by-m (where m = n - 1) list of lists of numbers.')

In [17]:
sr_matching(custom_matching_2, handle_odd_method='remove', debug=True)

Input validation passed.
Preference Dict: {'1': ['2'], '2': ['1']}
Ranks Dict: {'1': {'2': 0}, '2': {'1': 0}}
Stable matching found. Returning person : partner dictionary.
{'1': '2', '2': '1'}


([1, 0], 'Stable matching found after Phase 1.')

In [18]:
sr_matching(custom_matching_3, handle_odd_method='remove', debug=True)

Input validation passed.
Removing person 1 (matrix index), 2 (dict index)
Preference Dict: {'1': ['3'], '3': ['1']}
Ranks Dict: {'1': {'3': 0}, '3': {'1': 0}}
Stable matching found. Returning person : partner dictionary.
{'1': '3', '3': '1', '2': '-1'}


([2, -1, 0], 'Stable matching found after Phase 1.')

# Analysis of Stable Roommates Matching on Pair Research Data
Below, we analyze the impact of using the Stable Roommates algorithm on previous pairings. 

We begin by seeing
1. How frequently can we find stable matchings? 
2. When stable matchings are not possible, for what reason do they fail?

## Fetch Pairing Data from [pairresearch.io](http://pairresearch.io/)

In [19]:
import multiprocessing as mp
import random
import math

import pandas as pd
from pymongo import MongoClient

import seaborn as sns
%matplotlib inline

In [20]:
uri = 'mongodb://delta:delta@ds011419.mlab.com:11419/pair-research'
dbName = 'pair-research'
client = MongoClient(uri)
db = client[dbName]
db.collection_names()

['affinities',
 'meteor_accounts_loginServiceConfiguration',
 'tasks_history',
 'groups',
 'users',
 'objectlabs-system.admin.collections',
 'pairs_history',
 'tasks',
 'system.indexes',
 'pairings',
 'objectlabs-system',
 'affinities_history']

In [21]:
users = pd.DataFrame(list(db.users.find({})))

print('Number of Users: {}'.format(len(users)))
users.head()

Number of Users: 876


Unnamed: 0,_id,createdAt,emails,groups,profile,services
0,dibWQsjhkpvC52AFp,2016-08-16 15:54:28.489,"[{'address': 'hjlkadfjkl@32897.coj', 'verified...",[],{'fullName': 'hihi'},{'password': {'bcrypt': '$2a$10$dkjBKl9Po3AINK...
1,BPQ7hyoHgghctHPqq,2016-08-29 18:24:50.295,"[{'address': 'egerber@northwestern.edu', 'veri...","[{'groupId': '9mdkMmj4pY8Q2TwqF', 'role': {'_i...",{'fullName': 'Liz Gerber'},{'password': {'bcrypt': '$2a$10$Q9SnAxCEjS1V92...
2,bZEjadPH7KrjM9PfD,2016-11-10 19:19:34.147,"[{'address': 'ampiper@northwestern.edu', 'veri...","[{'groupId': 'qPnf2DHHihugATnxD', 'role': {'_i...",{'fullName': 'ampiper@northwestern.edu'},{'password': {}}
3,8mRni9ixefux6bSz9,2016-12-09 01:55:36.706,"[{'address': 'hscho122@kaist.ac.kr', 'verified...",[],{'fullName': 'hscho122@kaist.ac.kr'},{'password': {}}
4,JXCrPvRJwM5pK4Wk7,2017-01-05 07:05:46.455,"[{'address': 'artydeveloperduck@gmail.com', 'v...",[],"{'fullName': 'Deokseong', 'avatar': 'http://or...",{'password': {'bcrypt': '$2a$10$Obf8jHjBnkqLT4...


In [22]:
groups = pd.DataFrame(list(db.groups.find({})))

# remove testing groups
group_creator_ignore_list = ['Demo Admin', 'ykykykykykykykykykyk', 'Stella', 'Kevin Northwestern',
                             'Kevin Chen', 'Leesha', 'Jennie']
group_ignore_ids = groups[groups['creatorName'].isin(group_creator_ignore_list)]['_id'].unique()

# subset groups by id
groups_orig_size = len(groups)
groups_new_size = 0

groups = groups[~groups['_id'].isin(group_ignore_ids)]
groups.reset_index(drop=True, inplace=True)

# print change in size
groups_new_size = len(groups)
print('Original size: {} --> New size: {}'.format(groups_orig_size, groups_new_size))

# display task history
groups.head()

Original size: 444 --> New size: 59


Unnamed: 0,_id,active,activePairing,creationDate,creatorId,creatorName,description,groupName,members,roles
0,uPLDbfFqqdHEEkgCT,True,,2016-08-10 18:55:16.164,goGr47HDwtfphJ5xK,Julian Vicens,Rock and Roll Band,Beatles,"[{'fullName': 'Julian Vicens', 'userId': 'goGr...","[{'title': 'Guitar', '_id': 'oB3qMqXdTJNqR6vbZ..."
1,Et46F6odTBmiFiDSZ,True,nnN46Abcc78AAtqKf,2016-07-18 21:21:54.117,NtZ9hv3g6eLAwN2nY,Joe Germuska,Knight Lab taking Pair Research for a spin,Knight Lab Testing,"[{'fullName': 'Joe Germuska', 'userId': 'NtZ9h...","[{'title': 'Admin', '_id': 's2JKkhE9XC6GPW5ev'..."
2,kY7xHo6c5m5tCiQMH,False,,2016-09-28 19:17:10.709,u2GAvznbx7Jbf97Hk,Emily Withrow,Thursdays at 2:30,Knight Lab Pair Research,"[{'fullName': 'Emily Withrow', 'userId': 'u2GA...","[{'title': 'Professor', '_id': 'q3PJXDZpMMhcZB..."
3,KEo62WdN5WSkHa9Hh,False,,2016-09-29 15:15:15.184,u2GAvznbx7Jbf97Hk,Emily Withrow,Thursdays at 2:30,Knight Lab Pair Research,"[{'fullName': 'Emily Withrow', 'userId': 'u2GA...","[{'title': 'Professor', '_id': '6L6YwxgDwpqgoY..."
4,qPnf2DHHihugATnxD,True,x5nm2GgMvdjGwyK9Y,2016-11-10 18:38:04.379,PavTL8zD9664wvtfB,Haoqi Zhang,an intellectual community for design faculty a...,Segal Design Cluster,"[{'fullName': 'Haoqi Zhang', 'userId': 'PavTL8...","[{'title': 'Professor', '_id': 'sSNgzD6So2kz95..."


In [23]:
tasks_history = pd.DataFrame(list(db.tasks_history.find({})))

# remove bad groups
tasks_history_orig_size = len(tasks_history)
tasks_history_new_size = 0

tasks_history = tasks_history[~tasks_history['groupId'].isin(group_ignore_ids)]
tasks_history.reset_index(drop=True, inplace=True)

# print change in size
tasks_history_new_size = len(tasks_history)
print('Original size: {} --> New size: {}'.format(tasks_history_orig_size, tasks_history_new_size))

# display task history
tasks_history.head()

Original size: 2625 --> New size: 2613


Unnamed: 0,_id,groupId,name,pairingId,task,userId
0,k4ewZSgDHsvDFkXpX,9mdkMmj4pY8Q2TwqF,Yongsung Kim,nRAQpsPhsQs4zRvTL,i need to send out a short-survey to interviewees,EDEFWcagLwCfXP5Jg
1,RZZWR8pABaJBKYNFu,9mdkMmj4pY8Q2TwqF,Julian Vicens,nRAQpsPhsQs4zRvTL,I would like to talk about different ways to m...,goGr47HDwtfphJ5xK
2,Xr3dvNreiwzq9ixrQ,9mdkMmj4pY8Q2TwqF,Spencer Carlson,nRAQpsPhsQs4zRvTL,Make educated guesses about the quality of my ...,vbsF64nAgoitwrNeB
3,dFpfXT8szHkp2pYgG,9mdkMmj4pY8Q2TwqF,Leesha,nRAQpsPhsQs4zRvTL,I need help planning a latency handling featur...,aNdSTecskgeAm2St5
4,zEMk9HQo9azvKzDye,9mdkMmj4pY8Q2TwqF,Eureka Foong,nRAQpsPhsQs4zRvTL,Installing a program using Terminal (I'm bad a...,JaEySKdKKg7LAF3Yg


In [24]:
pairings = pd.DataFrame(list(db.pairings.find({})))

# remove bad groups
pairings_orig_size = len(pairings)
pairings_new_size = 0

pairings = pairings[~pairings['groupId'].isin(group_ignore_ids)]

# add group_pair id
pairings['group_pair_id'] = pairings['groupId'] + '-' + pairings['_id']
pairings.reset_index(drop=True, inplace=True)

# print change in size
pairings_new_size = len(pairings)
print('Original size: {} --> New size: {}'.format(pairings_orig_size, pairings_new_size))

# display current pairings
print('Pairing count: {}, Unique group count: {}'.format(len(pairings), len(pairings.groupId.unique())))
pairings.sort_values('timestamp', ascending=True).head()

Original size: 437 --> New size: 369
Pairing count: 369, Unique group count: 38


Unnamed: 0,_id,groupId,pairings,timestamp,group_pair_id
38,N23iLvjp2GWcsHYd5,9mdkMmj4pY8Q2TwqF,"[{'firstUserId': 'zBZSGgrZFfW5KH5vj', 'firstUs...",2016-08-05 20:14:57.480,9mdkMmj4pY8Q2TwqF-N23iLvjp2GWcsHYd5
0,soiecrpv6CRPTqmkd,9mdkMmj4pY8Q2TwqF,"[{'firstUserId': 'PavTL8zD9664wvtfB', 'firstUs...",2016-08-29 18:22:48.499,9mdkMmj4pY8Q2TwqF-soiecrpv6CRPTqmkd
1,e3PQuthB9woF8koC8,9mdkMmj4pY8Q2TwqF,"[{'firstUserId': 'PavTL8zD9664wvtfB', 'firstUs...",2016-08-29 18:23:39.896,9mdkMmj4pY8Q2TwqF-e3PQuthB9woF8koC8
9,7BpbSGW9YSvqN3sgx,9mdkMmj4pY8Q2TwqF,"[{'firstUserId': 'EDEFWcagLwCfXP5Jg', 'firstUs...",2016-09-02 19:12:46.689,9mdkMmj4pY8Q2TwqF-7BpbSGW9YSvqN3sgx
10,vskS7yWgLPkk7jYq2,9mdkMmj4pY8Q2TwqF,"[{'firstUserId': 'EDEFWcagLwCfXP5Jg', 'firstUs...",2016-09-06 19:19:40.448,9mdkMmj4pY8Q2TwqF-vskS7yWgLPkk7jYq2


In [25]:
pairs_history = pd.DataFrame(list(db.pairs_history.find({})))

# remove bad groups
pairs_history_orig_size = len(pairs_history)
pairs_history_new_size = 0

pairs_history = pairs_history[~pairs_history['groupId'].isin(group_ignore_ids)]

# add group_pairing_id column
pairs_history['group_pairing_id'] = pairs_history['groupId'] + '-' + pairs_history['pairingId']
pairs_history.reset_index(drop=True, inplace=True)

# print change in size
pairs_history_new_size = len(pairs_history)
print('Original size: {} --> New size: {}'.format(pairs_history_orig_size, pairs_history_new_size))

# display current pairs_history
print('Unique group count: {}, Unique pairing count: {}'.format(len(pairs_history.groupId.unique()), 
                                                                len(pairs_history.group_pairing_id.unique())))
pairs_history.sort_values('timestamp', ascending=True).head()

Original size: 1915 --> New size: 1907
Unique group count: 38, Unique pairing count: 369


Unnamed: 0,_id,firstUserId,firstUserName,firstUserRole,groupId,pairingId,secondUserId,secondUserName,secondUserRole,timestamp,group_pairing_id
0,SSL2EMkRW4CHf66KE,xCnLbAobcKwPq7RD5,Rob Miller,Admin,9mdkMmj4pY8Q2TwqF,ctPEz48CJqcA54YeD,5pimyGfESMe3ctdSa,HQ test,PhD Students,2016-08-01 18:55:00.107,9mdkMmj4pY8Q2TwqF-ctPEz48CJqcA54YeD
1,x78xDiybFqDgEvNKY,PavTL8zD9664wvtfB,Haoqi Zhang,Admin,9mdkMmj4pY8Q2TwqF,nRAQpsPhsQs4zRvTL,KYnkykoMwd9fbBbWB,Julie Hui,Admin,2016-08-01 18:55:00.232,9mdkMmj4pY8Q2TwqF-nRAQpsPhsQs4zRvTL
2,2iJAAApLAmipkui2d,gynuaAvfp3gAd4Gyo,eharburg@gmail.com,Admin,9mdkMmj4pY8Q2TwqF,nRAQpsPhsQs4zRvTL,MJkj24zXWKhnZQCc3,Daniel George Rees Lewis,Admin,2016-08-01 18:55:00.298,9mdkMmj4pY8Q2TwqF-nRAQpsPhsQs4zRvTL
3,vP4N8EnXMHPkcCpsH,aNdSTecskgeAm2St5,Leesha,Admin,9mdkMmj4pY8Q2TwqF,nRAQpsPhsQs4zRvTL,EDEFWcagLwCfXP5Jg,Yongsung Kim,Admin,2016-08-01 18:55:00.301,9mdkMmj4pY8Q2TwqF-nRAQpsPhsQs4zRvTL
4,cgPA9iLvkf3bb8Smn,6iR9Z64HEJDcD8qbu,Matt Easterday,Admin,9mdkMmj4pY8Q2TwqF,nRAQpsPhsQs4zRvTL,JaEySKdKKg7LAF3Yg,Eureka Foong,Admin,2016-08-01 18:55:00.305,9mdkMmj4pY8Q2TwqF-nRAQpsPhsQs4zRvTL


In [26]:
tasks = pd.DataFrame(list(db.tasks.find({})))

# remove bad groups
tasks_orig_size = len(tasks)
tasks_new_size = 0

tasks = tasks[~tasks['groupId'].isin(group_ignore_ids)]
tasks.reset_index(drop=True, inplace=True)

# print change in size
tasks_new_size = len(tasks)
print('Original size: {} --> New size: {}'.format(tasks_orig_size, tasks_new_size))

# display current tasks
tasks.head()

Original size: 1026 --> New size: 887


Unnamed: 0,_id,groupId,name,task,userId
0,kcrr49h2nqnd4zthw,Caei5ywbviEaF44TS,kchen,ihih,AX8FFZHzPa8eF8bBE
1,N4MWm7c8tTf9LZrZ5,NRg4vMMoxEAqTHazP,kchen,I need help with testing pair research,AX8FFZHzPa8eF8bBE
2,juYeYQAt5iNm64iJs,NRg4vMMoxEAqTHazP,ryan,Meteor cordova enterprise push notifications,SFg6T8vhT56EeCkRX
3,qFodnk9mikQF2SvHd,NRg4vMMoxEAqTHazP,shannon,Fixing my laptop screen,5FjQBco6MXaSFhap4
4,yYtwJsrNaYwFpBuvw,NRg4vMMoxEAqTHazP,katiegeorge,meatspace help,hkZoyLhrWetKwep3r


In [27]:
affinities = pd.DataFrame(list(db.affinities.find({})))

# remove bad groups
affinities_orig_size = len(affinities)
affinities_new_size = 0

affinities = affinities[~affinities['groupId'].isin(group_ignore_ids)]
affinities.reset_index(drop=True, inplace=True)

# print change in size
affinities_new_size = len(affinities)
print('Original size: {} --> New size: {}'.format(affinities_orig_size, affinities_new_size))

# display current affinities
affinities.head()

Original size: 2972 --> New size: 2930


Unnamed: 0,_id,groupId,helpeeId,helperId,value
0,e6rjGWDrWE5YKxdbh,NRg4vMMoxEAqTHazP,AX8FFZHzPa8eF8bBE,SFg6T8vhT56EeCkRX,5.0
1,mSnrrMX7y26NSQ7iN,NRg4vMMoxEAqTHazP,SFg6T8vhT56EeCkRX,AX8FFZHzPa8eF8bBE,5.0
2,w72kT4Ez7xYkfE8JF,NRg4vMMoxEAqTHazP,5FjQBco6MXaSFhap4,AX8FFZHzPa8eF8bBE,1.0
3,c5xFCfvPimbBsnsGg,NRg4vMMoxEAqTHazP,hkZoyLhrWetKwep3r,AX8FFZHzPa8eF8bBE,4.0
4,bbTEQ3mvL46mTTskJ,NRg4vMMoxEAqTHazP,AX8FFZHzPa8eF8bBE,5FjQBco6MXaSFhap4,5.0


In [28]:
affinities_history = pd.DataFrame(list(db.affinities_history.find({})))

# remove bad groups
affinities_history_orig_size = len(affinities_history)
affinities_history_new_size = 0

affinities_history = affinities_history[~affinities_history['groupId'].isin(group_ignore_ids)]

# add group_pairing_id column
affinities_history['group_pairing_id'] = affinities_history['groupId'] + '-' + affinities_history['pairingId']

# remove duplicate ratings
affinities_history.sort_values(['group_pairing_id', 'helpeeId', 'helperId'], inplace=True)
affinities_history.drop_duplicates(subset=['group_pairing_id', 'helpeeId', 'helperId'], keep='first', inplace=True)
affinities_history.reset_index(drop=True, inplace=True)

# print change in size
affinities_history_new_size = len(affinities_history)
print('Original size: {} --> New size: {}'.format(affinities_history_orig_size, affinities_history_new_size))

# display affinity data
print('Unique Group Pairings: {}'.format(len(affinities_history.group_pairing_id.unique())))
affinities_history.head()

Original size: 31212 --> New size: 30942
Unique Group Pairings: 355


Unnamed: 0,_id,groupId,helpeeId,helperId,pairingId,value,group_pairing_id
0,v3nKkg77Jouf6BZ8G,2rFoGTfRa9LFdpQNA,3si95Pn6NjXTxCWcT,GLTz7m8y7RqZCYzxx,2EPbA6HkydPTdxCWD,0.33,2rFoGTfRa9LFdpQNA-2EPbA6HkydPTdxCWD
1,D2kBQDRftmygv5f4L,2rFoGTfRa9LFdpQNA,3si95Pn6NjXTxCWcT,PWufwHDsbRaw4se4X,2EPbA6HkydPTdxCWD,1.0,2rFoGTfRa9LFdpQNA-2EPbA6HkydPTdxCWD
2,R588B5nqLhmLbC4iW,2rFoGTfRa9LFdpQNA,3si95Pn6NjXTxCWcT,f8wwqTXaifkxxoAc2,2EPbA6HkydPTdxCWD,0.0,2rFoGTfRa9LFdpQNA-2EPbA6HkydPTdxCWD
3,poiynLy2tnCMNzdGf,2rFoGTfRa9LFdpQNA,3si95Pn6NjXTxCWcT,iyRaCwz7QzxPRSi5t,2EPbA6HkydPTdxCWD,1.0,2rFoGTfRa9LFdpQNA-2EPbA6HkydPTdxCWD
4,KmiSFQicDRa263Nfc,2rFoGTfRa9LFdpQNA,3si95Pn6NjXTxCWcT,kEZXdjhfohiGxJWdu,2EPbA6HkydPTdxCWD,-1.0,2rFoGTfRa9LFdpQNA-2EPbA6HkydPTdxCWD


## Run Stable Matching with All Previous Pairs

In [29]:
def create_affinity_matrix(affinities, remap=False): 
    """
    Creates an n^2 affinity matrix.
        
    Input:
        affinities (pandas DataFrame): dataframe with helpeeId, helperId, and value columns.
        remap (boolean): remap values to their equivalent on the interface
    
    Output:
        (list of list of numbers): matrix of affinities. 0 if no affinity between users.
        (dict): dict where keys are numbers and values are userIds mapping matrix index to users.
    """
    # dont modify original dataframe
    affinities = deepcopy(affinities)
    
    # create user superset and user:index mapping
    user_superset = list(set(list(affinities['helperId']) + list(affinities['helpeeId'])))
    user_count = len(user_superset)
    user_index_dict = {user_superset[x]: x for x in range(user_count)}
    
    # create empty n^2 matrix
    affinity_matrix = [[0 for y in range(user_count)] for x in range(user_count)]
    
    # remap data values to UI values
    if remap:        
        value_mappings = {
            '-1.0': 1,
            '0.0':  2,
            '0.33': 3,
            '0.66': 4,
            '1.0':  5
        }
        affinities['value'] = affinities['value'].astype(str)
        affinities.replace({'value': value_mappings}, inplace=True)

    affinities.drop_duplicates(inplace=True)
    
    # loop through data and populate matrix
    for index, row in affinities.iterrows():
        curr_helper_index = user_index_dict[row['helperId']]
        curr_helpee_index = user_index_dict[row['helpeeId']]
        curr_value = row['value']
        
        affinity_matrix[curr_helper_index][curr_helpee_index] = curr_value
    
    # flip user and index in dict
    index_user_dict = {str(v): k for (k, v) in user_index_dict.items()}
        
    return affinity_matrix, index_user_dict


def get_recent_pairings(group_pair_id, limit):
    """
    Retrieve the most recent pairings for a group_id, up to limit, before pairing instance is run
        and return as a dictionary. 
    
    Input:
        group_pair_id (string): group-pairing instance to get recent pairing data for.
        limit (number): number of most recent pairings to get.
        
    Output:
        (dict): dict mapping, bi-directionally, each pairing
    """
    group_id = group_pair_id.split('-')[0]
    
    # get raw pairing data
    relevant_pairings = pairings[(pairings['groupId'] == group_id) & (pairings['group_pair_id'] != group_pair_id)]
    pairing_instance_list = relevant_pairings.sort_values('timestamp', ascending=True)[0:limit]['pairings'].tolist()
    output_list = []
    
    # create dictionaries and add to output
    for pairing_instance in pairing_instance_list:
        pairing_dict = {}
        for pairing in pairing_instance:
            # check if the user is paired with someone
            if 'secondUserId' in pairing:
                # bi-directional representation in dictionary
                pairing_dict[pairing['firstUserId']] = pairing['secondUserId']
                pairing_dict[pairing['secondUserId']] = pairing['firstUserId']
            else:
                pairing_dict[pairing['firstUserId']] = ''
        
        # add to output list
        output_list.append(pairing_dict)
    
    return output_list

def create_weighted_matrix(affinity_matrix, index_user_mapping, recent_pairings):
    """
    Converts an affinity matrix into a weighted matrix.
        Weight is calculated based on previous recent pairings and some random perturbation.
    
    Input:
        affinity_matrix (list of list of numbers): matrix of affinities. 0 if no affinity between users.
        index_user_mapping (dict): dict where keys are numbers and values are userIds mapping matrix index to users.
        recent_pairings (list of dict): up to 3 pairing sessions, ordered by recency, 
            with each dict containing helper-helpee pairs
    
    Output: 
        (list of list of numbers): weighted matrix
    """
    # dont modify original dataframe
    weighted_matrix = deepcopy(affinity_matrix)
    
    # iterate over each element and compute weighted value
    matrix_iterator = range(len(affinity_matrix))
    for row in matrix_iterator:
        for col in matrix_iterator:
            # ignore diagonal
            if row == col:
                continue
            
            # scale weight to be between -100 to 100
            weight = 1 + 99 * affinity_matrix[row][col]
            
            # Penalize recent pairings by increasing weight of pairs that have NOT occurred recently for last 3 pairings
            # ex. If A and B have not paired last time, increase their weight by 80 * 0.5^1
            # ex. If they also didn't pair time before, further increase their weight by 80 * 0.5^2 and so on (up to 3)
            for index, pairing in enumerate(recent_pairings):
                helper = index_user_mapping[str(row)]
                helpee = index_user_mapping[str(col)]
                
                # helper-helpee pairing does not exist in the current pairing
                if helper in pairing and pairing[helper] != helpee:
                    weight += 80 * 0.5 ** (index + 1)
            
            # add some random perturbation, between 0-20, to guarentee strict ordering
            weight += random.random() * 20
            
            # store new edge weight
            weighted_matrix[row][col] = weight # math.floor(weight)
    
    return weighted_matrix

def create_preference_matrix(weighted_matrix):
    """
    Converts an n^2 weighted matrix into a n-by-m preference matrix (where m = n - 1).
    
    Input: 
        weighted_matrix (list of list of numbers): matrix of weighted affinities
    
    Return: 
        (list of list of numbers): preference matrix where each list is ordered list of person indices.
    """
    # create zipped lists of (index, rating)
    preference_matrix = [[(i + 1, value) for i, value in enumerate(x)] for x in weighted_matrix]
    
    # format each row
    for index, curr_person in enumerate(preference_matrix):
        curr_person.sort(key=lambda tup: tup[1], reverse=True)
        
        # add sorted preference list without self
        preference_matrix[index] = [person_rating[0] for person_rating in curr_person if person_rating[0] - 1 != index]
        
    return preference_matrix

def sr_matching_pair_research(group_pair_id, handle_odd_method='remove', remove_all=True):
    """
    Runs stable matching on pair research data, given a group_pair_id to run matching for.
    
    Input: 
        group_pair_id (string): group pairing to run matching on
        handle_odd_method (string): handling odd cases by either adding ('add') or removing ('remove') user
        remove_all (boolean): whether to try again if randomly removing a person fails
        
    Output:
        (dict): output of matching, along with matching metadata
    """
    # create affinity matrix
    curr_data = deepcopy(affinities_history[affinities_history['group_pairing_id'] == group_pair_id])
    curr_affinity_matrix, curr_index_user_mapping = create_affinity_matrix(curr_data[['helperId', 'helpeeId', 'value']], remap=False)

    # get recent pairings and create weighted matrix
    curr_recent_pairings = get_recent_pairings(group_pair_id, 3)
    curr_weighted_matrix = create_weighted_matrix(curr_affinity_matrix, curr_index_user_mapping, curr_recent_pairings)
    
    # create preference matrix
    curr_pref_matrix = create_preference_matrix(curr_weighted_matrix)
    
    # run stable roommates
    stable_result, debug = sr_matching(curr_pref_matrix, handle_odd_method=handle_odd_method, remove_all=remove_all)
    
    # create metadata about the current affinity and add data to pairing_data
    group_id, pairing_id = group_pair_id.split('-')
    user_count = len(curr_affinity_matrix)
    curr_timestamp = pairs_history[pairs_history.group_pairing_id == group_pair_id].iat[0, pairs_history.columns.get_loc('timestamp')]
    
    # create and return matching data
    matching_data = {
        'group_pair_id': group_pair_id,
        'group_id': group_id,
        'pairing_id': pairing_id,
        'timestamp': curr_timestamp,
        'user_count': user_count,
        'odd_even': 'even' if user_count % 2 == 0 else 'odd',
        'odd_handling': handle_odd_method,
        'stable_result': stable_result,
        'stable_unstable': 'unstable' if stable_result is None else 'stable',
        'stable_printout': debug,
        'affinity_matrix': curr_affinity_matrix,
        'weighted_matrix': curr_weighted_matrix,
        'preference_matrix': curr_pref_matrix
    }
    return matching_data

def sr_matching_pair_research_wrapper(exec_dicts):
    """
    Wrapper for sr_matching_pair_research that allows for changing optional parameters.
    
    Input:
        exec_dicts (list of dicts): contains group_pair_id, handle_odd_method, and remove_all
    
    Output:
        (dict): output of matching, along with matching metadata
    """
    return sr_matching_pair_research(exec_dicts['group_pair_id'],
                                     exec_dicts['handle_odd_method'],
                                     exec_dicts['remove_all'])

def execute_sr_matching(group_pairing_ids, handle_odd_method='remove', remove_all=True, parallel=False):
    """
    Wrapper for computing pair research matchings that calls sr_matching_pair_research_wrapper. 
    
    Input:
        group_pairing_ids (list of string): unique group pairing ids to conduct matching on.
        remove_all (boolean): whether to try again if randomly removing a person fails
        parallel (boolean): run matching in parallel across all group_pairing_ids
        
    Output:
        (DataFrame): matchings computed for pair research data
    """
    pairing_data = []
    exec_dicts = [
        {'group_pair_id': group_pair_id, 'handle_odd_method': handle_odd_method, 'remove_all': remove_all} for group_pair_id in group_pairing_ids
    ] 
    
    # compute pairings
    if parallel:
        pool = mp.Pool(processes=mp.cpu_count())
        pairing_data = pool.map(sr_matching_pair_research_wrapper, exec_dicts)
        pool.close()
        pool.join()
    else:
        pairing_data = [sr_matching_pair_research_wrapper(exec_dict) for exec_dict in exec_dicts]
    
    return pd.DataFrame(pairing_data)

### Remove One User Only

In [30]:
# get all pairing instances
group_pairing_ids = affinities_history.group_pairing_id.unique()

# compute pairings and create DataFrame of results
pairing_data_df = execute_sr_matching(group_pairing_ids, handle_odd_method='remove', remove_all=False, parallel=True)

# print results
stable_count = len(pairing_data_df[pairing_data_df['stable_unstable'] == 'stable'])
unstable_count = len(pairing_data_df[pairing_data_df['stable_unstable'] == 'unstable'])
total = stable_count + unstable_count

print('Stable Count: {}, Unstable Count: {}, Total: {} | Proportion: {:1.2f}%'.format(stable_count, unstable_count, total, round(100 * stable_count / total, 2)))
pairing_data_df.head()

Stable Count: 295, Unstable Count: 60, Total: 355 | Proportion: 83.10%


Unnamed: 0,affinity_matrix,group_id,group_pair_id,odd_even,odd_handling,pairing_id,preference_matrix,stable_printout,stable_result,stable_unstable,timestamp,user_count,weighted_matrix
0,"[[0, 1.0, 1.0, 0, 0, 0, 1.0, 1.0, 1.0, 1.0], [...",2rFoGTfRa9LFdpQNA,2rFoGTfRa9LFdpQNA-2EPbA6HkydPTdxCWD,even,remove,2EPbA6HkydPTdxCWD,"[[10, 3, 2, 7, 9, 8, 6, 5, 4], [7, 8, 1, 6, 4,...",Stable matching found after Phase 2.,"[9, 5, 4, 6, 2, 1, 3, 8, 7, 0]",stable,2017-09-26 21:33:10.196,10,"[[0, 116.17752180546587, 116.3623358694074, 18..."
1,"[[0, 0], [1.0, 0]]",2rFoGTfRa9LFdpQNA,2rFoGTfRa9LFdpQNA-A6d3rQwrRZHEz4qHu,even,remove,A6d3rQwrRZHEz4qHu,"[[2], [1]]",Stable matching found after Phase 1.,"[1, 0]",stable,2017-08-22 17:19:36.847,2,"[[0, 12.809850248980794], [100.63565358963567,..."
2,"[[0, 1.0, 1.0, 0, 0, 0, 1.0, 1.0, 1.0, 1.0], [...",2rFoGTfRa9LFdpQNA,2rFoGTfRa9LFdpQNA-JS2qH6wPAxLfjZtJW,even,remove,JS2qH6wPAxLfjZtJW,"[[7, 9, 3, 8, 2, 10, 5, 6, 4], [7, 8, 1, 6, 10...",Failed at Phase 2: could not find an all-or-no...,,unstable,2017-09-26 21:33:04.597,10,"[[0, 110.50021510445333, 115.50058636738238, 1..."
3,"[[0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, ...",2rFoGTfRa9LFdpQNA,2rFoGTfRa9LFdpQNA-SpiKfuqCoEZRLfDNK,even,remove,SpiKfuqCoEZRLfDNK,"[[8, 6, 2, 7, 5, 4, 3], [6, 7, 1, 4, 3, 5, 8],...",Failed at Phase 2: could not find an all-or-no...,,unstable,2018-01-16 21:42:19.584,8,"[[0, 72.8098502489808, 61.63565358963567, 62.8..."
4,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1.0, 0, 0, 1...",2rFoGTfRa9LFdpQNA,2rFoGTfRa9LFdpQNA-SwhcfsdjNCZcyzx3t,even,remove,SwhcfsdjNCZcyzx3t,"[[4, 2, 3, 9, 6, 8, 5, 7, 10], [7, 4, 1, 9, 5,...",Stable matching found after Phase 1.,"[1, 0, 7, 6, 5, 4, 3, 2, 9, 8]",stable,2017-11-28 21:48:06.568,10,"[[0, 75.72942428328025, 74.53398974845823, 78...."


### Remove another user (and attempt for all users) if Stable Matching isn't Found

In [31]:
# get all pairing instances
group_pairing_ids = affinities_history.group_pairing_id.unique()

# compute pairings and create DataFrame of results
pairing_data_df = execute_sr_matching(group_pairing_ids, handle_odd_method='remove', remove_all=True, parallel=True)

# print results
stable_count = len(pairing_data_df[pairing_data_df['stable_unstable'] == 'stable'])
unstable_count = len(pairing_data_df[pairing_data_df['stable_unstable'] == 'unstable'])
total = stable_count + unstable_count

print('Stable Count: {}, Unstable Count: {}, Total: {} | Proportion: {:1.2f}%'.format(stable_count, unstable_count, total, round(100 * stable_count / total, 2)))
pairing_data_df.head()

Stable Count: 325, Unstable Count: 30, Total: 355 | Proportion: 91.55%


Unnamed: 0,affinity_matrix,group_id,group_pair_id,odd_even,odd_handling,pairing_id,preference_matrix,stable_printout,stable_result,stable_unstable,timestamp,user_count,weighted_matrix
0,"[[0, 1.0, 1.0, 0, 0, 0, 1.0, 1.0, 1.0, 1.0], [...",2rFoGTfRa9LFdpQNA,2rFoGTfRa9LFdpQNA-2EPbA6HkydPTdxCWD,even,remove,2EPbA6HkydPTdxCWD,"[[7, 9, 3, 2, 10, 8, 5, 6, 4], [6, 7, 8, 1, 4,...",Stable matching found after Phase 2.,"[6, 5, 4, 8, 2, 1, 0, 9, 3, 7]",stable,2017-09-26 21:33:10.196,10,"[[0, 120.14959012598534, 125.02789904996727, 2..."
1,"[[0, 0], [1.0, 0]]",2rFoGTfRa9LFdpQNA,2rFoGTfRa9LFdpQNA-A6d3rQwrRZHEz4qHu,even,remove,A6d3rQwrRZHEz4qHu,"[[2], [1]]",Stable matching found after Phase 1.,"[1, 0]",stable,2017-08-22 17:19:36.847,2,"[[0, 12.809850248980794], [100.63565358963567,..."
2,"[[0, 1.0, 1.0, 0, 0, 0, 1.0, 1.0, 1.0, 1.0], [...",2rFoGTfRa9LFdpQNA,2rFoGTfRa9LFdpQNA-JS2qH6wPAxLfjZtJW,even,remove,JS2qH6wPAxLfjZtJW,"[[7, 9, 3, 8, 2, 10, 5, 6, 4], [7, 8, 1, 6, 10...",Failed at Phase 2: could not find an all-or-no...,,unstable,2017-09-26 21:33:04.597,10,"[[0, 110.50021510445333, 115.50058636738238, 1..."
3,"[[0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, ...",2rFoGTfRa9LFdpQNA,2rFoGTfRa9LFdpQNA-SpiKfuqCoEZRLfDNK,even,remove,SpiKfuqCoEZRLfDNK,"[[8, 6, 2, 7, 5, 4, 3], [6, 7, 1, 4, 3, 5, 8],...",Failed at Phase 2: could not find an all-or-no...,,unstable,2018-01-16 21:42:19.584,8,"[[0, 72.8098502489808, 61.63565358963567, 62.8..."
4,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1.0, 0, 0, 1...",2rFoGTfRa9LFdpQNA,2rFoGTfRa9LFdpQNA-SwhcfsdjNCZcyzx3t,even,remove,SwhcfsdjNCZcyzx3t,"[[4, 2, 3, 9, 6, 8, 5, 7, 10], [7, 4, 1, 9, 5,...",Stable matching found after Phase 1.,"[1, 0, 7, 6, 5, 4, 3, 2, 9, 8]",stable,2017-11-28 21:48:06.568,10,"[[0, 75.72942428328025, 74.53398974845823, 78...."


## Analyzing Instability

### TODO
- why wasnt a person proposed to? --> see this

In [32]:
pairing_data_df[['stable_unstable', 'stable_printout', 'group_pair_id']].groupby(['stable_unstable', 'stable_printout']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,group_pair_id
stable_unstable,stable_printout,Unnamed: 2_level_1
stable,Stable matching found after Phase 1.,237
stable,Stable matching found after Phase 2.,88
unstable,Failed at Phase 1: not everyone was proposed to.,11
unstable,Failed at Phase 2: could not find an all-or-nothing cycle len > 3.,17
unstable,"Failed at Verification after Phase 2: matching computed, but not valid.",2


In [33]:
pairing_data_df[['stable_unstable', 'stable_printout', 'odd_even', 'group_pair_id']].groupby(['stable_unstable', 'stable_printout', 'odd_even']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,group_pair_id
stable_unstable,stable_printout,odd_even,Unnamed: 3_level_1
stable,Stable matching found after Phase 1.,even,103
stable,Stable matching found after Phase 1.,odd,134
stable,Stable matching found after Phase 2.,even,47
stable,Stable matching found after Phase 2.,odd,41
unstable,Failed at Phase 1: not everyone was proposed to.,even,11
unstable,Failed at Phase 2: could not find an all-or-nothing cycle len > 3.,even,17
unstable,"Failed at Verification after Phase 2: matching computed, but not valid.",even,1
unstable,"Failed at Verification after Phase 2: matching computed, but not valid.",odd,1


### Unstable Case 1--Failed at Phase 1: not everyone was proposed to.	

In [34]:
unstable_cases_1 = pairing_data_df[pairing_data_df['stable_printout'] == 'Failed at Phase 1: not everyone was proposed to.']
unstable_cases_1.head()

Unnamed: 0,affinity_matrix,group_id,group_pair_id,odd_even,odd_handling,pairing_id,preference_matrix,stable_printout,stable_result,stable_unstable,timestamp,user_count,weighted_matrix
20,"[[0, 0.66, 0, 0, 0.0, 0.33, 0.33, 0.66, 0, 0.3...",73dAmLAqBfvERHcHv,73dAmLAqBfvERHcHv-h7pRkiDWqGHjtPpnv,even,remove,h7pRkiDWqGHjtPpnv,"[[8, 18, 2, 12, 10, 11, 6, 7, 20, 16, 5, 3, 4,...",Failed at Phase 1: not everyone was proposed to.,,unstable,2017-04-21 21:52:34.770,20,"[[0, 70.99321786781479, 13.040374580999607, 12..."
29,"[[0, 0.33, 0, -1.0, 0.0, 1.0, 0.66, 0, 0.0, 0....",9mdkMmj4pY8Q2TwqF,9mdkMmj4pY8Q2TwqF-8DSi4QqxYSAdyNMpf,even,remove,8DSi4QqxYSAdyNMpf,"[[6, 7, 10, 11, 12, 2, 5, 3, 9, 8, 4], [9, 1, ...",Failed at Phase 1: not everyone was proposed to.,,unstable,2017-09-25 21:57:27.592,12,"[[0, 38.56783707606952, 3.790758570502878, -95..."
40,"[[0, 0.33, 0, -1.0, 0.0, 1.0, 0.66, 0, 0.0, 0....",9mdkMmj4pY8Q2TwqF,9mdkMmj4pY8Q2TwqF-FNznjYFehPoa7Mx54,even,remove,FNznjYFehPoa7Mx54,"[[6, 7, 10, 11, 12, 2, 5, 3, 9, 8, 4], [9, 1, ...",Failed at Phase 1: not everyone was proposed to.,,unstable,2017-09-25 21:52:27.898,12,"[[0, 38.56783707606952, 3.790758570502878, -95..."
47,"[[0, 0.33, 0, -1.0, 0.0, 1.0, 0.66, 0, 0.0, 0....",9mdkMmj4pY8Q2TwqF,9mdkMmj4pY8Q2TwqF-KgXzYH5gk8nC6Wgx6,even,remove,KgXzYH5gk8nC6Wgx6,"[[6, 7, 10, 11, 12, 2, 5, 3, 9, 8, 4], [9, 1, ...",Failed at Phase 1: not everyone was proposed to.,,unstable,2017-09-25 21:50:24.312,12,"[[0, 38.56783707606952, 3.790758570502878, -95..."
48,"[[0, 0.33, 1.0, 0.0, 0.66, 0.33, 1.0, 0.33, 0....",9mdkMmj4pY8Q2TwqF,9mdkMmj4pY8Q2TwqF-MTpPunzB8EihJxbLe,even,remove,MTpPunzB8EihJxbLe,"[[3, 7, 9, 5, 6, 2, 8, 10, 4], [3, 1, 6, 9, 4,...",Failed at Phase 1: not everyone was proposed to.,,unstable,2018-04-20 14:57:17.306,10,"[[0, 43.0616628892501, 114.36665214408158, 14...."


### Unstable Case 2--Failed at Phase 2: could not find an all-or-nothing cycle len > 3.

In [35]:
unstable_cases_2 = pairing_data_df[pairing_data_df['stable_printout'] == 'Failed at Phase 1: not everyone was proposed to.']
unstable_cases_2.head()

Unnamed: 0,affinity_matrix,group_id,group_pair_id,odd_even,odd_handling,pairing_id,preference_matrix,stable_printout,stable_result,stable_unstable,timestamp,user_count,weighted_matrix
20,"[[0, 0.66, 0, 0, 0.0, 0.33, 0.33, 0.66, 0, 0.3...",73dAmLAqBfvERHcHv,73dAmLAqBfvERHcHv-h7pRkiDWqGHjtPpnv,even,remove,h7pRkiDWqGHjtPpnv,"[[8, 18, 2, 12, 10, 11, 6, 7, 20, 16, 5, 3, 4,...",Failed at Phase 1: not everyone was proposed to.,,unstable,2017-04-21 21:52:34.770,20,"[[0, 70.99321786781479, 13.040374580999607, 12..."
29,"[[0, 0.33, 0, -1.0, 0.0, 1.0, 0.66, 0, 0.0, 0....",9mdkMmj4pY8Q2TwqF,9mdkMmj4pY8Q2TwqF-8DSi4QqxYSAdyNMpf,even,remove,8DSi4QqxYSAdyNMpf,"[[6, 7, 10, 11, 12, 2, 5, 3, 9, 8, 4], [9, 1, ...",Failed at Phase 1: not everyone was proposed to.,,unstable,2017-09-25 21:57:27.592,12,"[[0, 38.56783707606952, 3.790758570502878, -95..."
40,"[[0, 0.33, 0, -1.0, 0.0, 1.0, 0.66, 0, 0.0, 0....",9mdkMmj4pY8Q2TwqF,9mdkMmj4pY8Q2TwqF-FNznjYFehPoa7Mx54,even,remove,FNznjYFehPoa7Mx54,"[[6, 7, 10, 11, 12, 2, 5, 3, 9, 8, 4], [9, 1, ...",Failed at Phase 1: not everyone was proposed to.,,unstable,2017-09-25 21:52:27.898,12,"[[0, 38.56783707606952, 3.790758570502878, -95..."
47,"[[0, 0.33, 0, -1.0, 0.0, 1.0, 0.66, 0, 0.0, 0....",9mdkMmj4pY8Q2TwqF,9mdkMmj4pY8Q2TwqF-KgXzYH5gk8nC6Wgx6,even,remove,KgXzYH5gk8nC6Wgx6,"[[6, 7, 10, 11, 12, 2, 5, 3, 9, 8, 4], [9, 1, ...",Failed at Phase 1: not everyone was proposed to.,,unstable,2017-09-25 21:50:24.312,12,"[[0, 38.56783707606952, 3.790758570502878, -95..."
48,"[[0, 0.33, 1.0, 0.0, 0.66, 0.33, 1.0, 0.33, 0....",9mdkMmj4pY8Q2TwqF,9mdkMmj4pY8Q2TwqF-MTpPunzB8EihJxbLe,even,remove,MTpPunzB8EihJxbLe,"[[3, 7, 9, 5, 6, 2, 8, 10, 4], [3, 1, 6, 9, 4,...",Failed at Phase 1: not everyone was proposed to.,,unstable,2018-04-20 14:57:17.306,10,"[[0, 43.0616628892501, 114.36665214408158, 14...."


### Unstable Case 3--Failed at Verification after Phase 2: matching computed, but not valid.

In [36]:
instability_cases_3 = pairing_data_df[pairing_data_df['stable_printout'] == 'Failed at Verification after Phase 2: matching computed, but not valid.']
instability_cases_3

Unnamed: 0,affinity_matrix,group_id,group_pair_id,odd_even,odd_handling,pairing_id,preference_matrix,stable_printout,stable_result,stable_unstable,timestamp,user_count,weighted_matrix
12,"[[0, -1.0, -1.0, -1.0, -1.0, 0.66, -1.0, 0.0, ...",3Fhz2DZHy6ofre3Nr,3Fhz2DZHy6ofre3Nr-F73t2pJjaXSadWkv8,odd,remove,F73t2pJjaXSadWkv8,"[[10, 6, 8, 11, 9, 5, 12, 15, 17, 7, 13, 14, 2...",Failed at Verification after Phase 2: matching...,,unstable,2018-03-13 19:45:37.203,17,"[[0, -95.27038665354804, -97.74287080851423, -..."
17,"[[0, 0.66, 0, 0, 0.0, 0.33, 0.33, 0.66, 0.33, ...",73dAmLAqBfvERHcHv,73dAmLAqBfvERHcHv-AvtB6obrYsusFyLSw,even,remove,AvtB6obrYsusFyLSw,"[[8, 2, 18, 12, 9, 6, 7, 10, 20, 14, 3, 4, 11,...",Failed at Verification after Phase 2: matching...,,unstable,2017-04-21 16:25:50.574,20,"[[0, 77.56490125877227, 15.32039225844807, 15...."


# Stability in Maximum Weighted Matching Pairings

## TODO
- see if existing matchings from MWM are currently stable