# Stable Matching Pair Research
Implementation of [Stable Roomates](http://www.dcs.gla.ac.uk/~pat/jchoco/roommates/papers/Comp_sdarticle.pdf) for [Pair Research](http://pairresearch.io/). 

# Load in Libraries and Stable Roommates Matching Module

In [48]:
%load_ext autoreload
%autoreload 2

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from copy import deepcopy

# load stable roommates code
from stable_roommates import stable_matching_wrapper as sr_matching

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Stable Roommates Matching Test Cases
A variety of test cases from (1) Irving's paper, (2) Wikipedia, (3) external implementations, and (4) any other custom cases.

### Test Case from Irving's Paper

In [2]:
paper_matching_6 = [
    [4, 6, 2, 5, 3],
    [6, 3, 5, 1, 4],
    [4, 5, 1, 6, 2],
    [2, 6, 5, 1, 3],
    [4, 2, 3, 6, 1],
    [5, 1, 4, 2, 3]
]

paper_matching_8 = [
    [2, 5, 4, 6, 7, 8, 3],
    [3, 6, 1, 7, 8, 5, 4],
    [4, 7, 2, 8, 5, 6, 1],
    [1, 8, 3, 5, 6, 7, 2],
    [6, 1, 8, 2, 3, 4, 7],
    [7, 2, 5, 3, 4, 1, 8],
    [8, 3, 6, 4, 1, 2, 5],
    [5, 4, 7, 1, 2, 3, 6]
]

paper_no_matching_4 = [
    [2, 3, 4],
    [3, 1, 4],
    [1, 2, 4],
    [1, 2, 3]
]

paper_no_matching_6 = [
    [2, 6, 4, 3, 5],
    [3, 5, 1, 6, 4],
    [1, 6, 2, 5, 4],
    [5, 2, 3, 6, 1],
    [6, 1, 3, 4, 2],
    [4, 2, 5, 1, 3]
]

In [3]:
sr_matching(paper_matching_6, debug=True)

Input validation passed.
Stable matching found. Returning person : partner dictionary.
{'1': '6', '2': '3', '3': '2', '4': '5', '5': '4', '6': '1'}


([6, 3, 2, 5, 4, 1], 'Stable matching found after Phase 2.')

In [4]:
sr_matching(paper_matching_8, debug=True)

Input validation passed.
Stable matching found. Returning person : partner dictionary.
{'1': '4', '2': '3', '3': '2', '4': '1', '5': '6', '6': '5', '7': '8', '8': '7'}


([4, 3, 2, 1, 6, 5, 8, 7], 'Stable matching found after Phase 2.')

In [5]:
sr_matching(paper_no_matching_4, debug=True)

Input validation passed.
Stable matching is not possible. Failed at Phase 1: not everyone was proposed to.


(None, 'Failed at Phase 1: not everyone was proposed to.')

In [6]:
sr_matching(paper_no_matching_6, debug=True)

Input validation passed.
Stable matching is not possible. Failed at Phase 2: could not find an all-or-nothing cycle len > 3.


(None, 'Failed at Phase 2: could not find an all-or-nothing cycle len > 3.')

### Test Cases from Wikipedia Article (https://en.wikipedia.org/wiki/Stable_roommates_problem#Algorithm)

In [7]:
wiki_matching_6 = [
    [3, 4, 2, 6, 5],
    [6, 5, 4, 1, 3],
    [2, 4, 5, 1, 6],
    [5, 2, 3, 6, 1],
    [3, 1, 2, 4, 6],
    [5, 1, 3, 4, 2]
]

In [8]:
sr_matching(wiki_matching_6, debug=True)

Input validation passed.
Stable matching found. Returning person : partner dictionary.
{'1': '6', '2': '4', '3': '5', '4': '2', '5': '3', '6': '1'}


([6, 4, 5, 2, 3, 1], 'Stable matching found after Phase 2.')

### Test Cases from External Implementation (http://www.dcs.gla.ac.uk/~pat/roommates/distribution/data/) 

In [9]:
external_matching_8 = [
    [2, 5, 4, 6, 7, 8, 3],
    [3, 6, 1, 7, 8, 5, 4],
    [4, 7, 2, 8, 5, 6, 1],
    [1, 8, 3, 5, 6, 7, 2],
    [6, 1, 8, 2, 3, 4, 7],
    [7, 2, 5, 3, 4, 1, 8],
    [8, 3, 6, 4, 1, 2, 5],
    [5, 4, 7, 1, 2, 3, 6]
]

external_matching_10 = [
    [8, 2, 9, 3, 6, 4, 5, 7, 10],
    [4, 3, 8, 9, 5, 1, 10, 6, 7],
    [5, 6, 8, 2, 1, 7, 10, 4, 9],
    [10, 7, 9, 3, 1, 6, 2, 5, 8],
    [7, 4, 10, 8, 2, 6, 3, 1, 9],
    [2, 8, 7, 3, 4, 10, 1, 5, 9],
    [2, 1, 8, 3, 5, 10, 4, 6, 9],
    [10, 4, 2, 5, 6, 7, 1, 3, 9],
    [6, 7, 2, 5, 10, 3, 4, 8, 1],
    [3, 1, 6, 5, 2, 9, 8, 4, 7]
]

external_matching_20 = [
    [13, 12, 20, 17, 11, 6, 8, 2, 3, 14, 4, 16, 5, 10, 18, 19, 9, 15, 7],
    [13, 6, 8, 17, 18, 19, 1, 11, 7, 4, 15, 16, 5, 9, 3, 20, 12, 10, 14],
    [6, 16, 4, 9, 14, 13, 17, 19, 8, 2, 1, 12, 20, 5, 18, 15, 7, 11, 10],
    [11, 7, 8, 2, 17, 3, 15, 6, 19, 10, 9, 5, 1, 16, 13, 20, 18, 14, 12],
    [8, 17, 14, 16, 4, 13, 15, 6, 19, 9, 12, 7, 2, 3, 11, 18, 20, 10, 1],
    [8, 13, 10, 14, 18, 15, 2, 7, 4, 16, 19, 5, 9, 17, 20, 3, 11, 12, 1],
    [13, 1, 4, 9, 19, 18, 11, 14, 10, 2, 17, 6, 15, 16, 5, 3, 12, 8, 20],
    [1, 6, 20, 7, 5, 15, 19, 4, 12, 3, 17, 9, 10, 14, 16, 2, 18, 11, 13],
    [17, 13, 3, 5, 7, 4, 12, 2, 18, 20, 15, 8, 10, 1, 6, 11, 19, 14, 16],
    [9, 4, 16, 14, 18, 17, 15, 11, 20, 13, 3, 12, 2, 1, 19, 7, 5, 8, 6],
    [6, 15, 4, 1, 18, 14, 5, 3, 9, 2, 17, 13, 8, 7, 12, 20, 19, 10, 16],
    [5, 18, 7, 16, 6, 20, 19, 14, 9, 17, 3, 1, 8, 10, 11, 13, 2, 15, 4],
    [3, 10, 7, 18, 14, 15, 1, 6, 12, 4, 8, 19, 16, 17, 5, 20, 9, 11, 2],
    [2, 5, 10, 13, 19, 17, 6, 3, 18, 7, 20, 9, 1, 4, 16, 12, 15, 8, 11],
    [12, 13, 5, 11, 2, 16, 18, 14, 1, 6, 17, 8, 19, 4, 10, 7, 20, 3, 9],
    [1, 7, 6, 5, 14, 18, 12, 17, 20, 11, 15, 10, 2, 13, 3, 8, 19, 9, 4],
    [5, 8, 15, 9, 7, 18, 11, 10, 19, 2, 1, 12, 3, 14, 20, 13, 6, 16, 4],
    [14, 3, 8, 10, 13, 5, 9, 15, 12, 1, 17, 6, 16, 11, 2, 7, 4, 19, 20],
    [9, 15, 20, 12, 18, 1, 11, 5, 3, 2, 13, 14, 10, 7, 6, 16, 8, 17, 4],
    [5, 6, 18, 19, 16, 7, 4, 9, 2, 17, 8, 15, 1, 12, 13, 10, 14, 3, 11]
]

# matching exists if algorithm leaves 7 unmatched
external_matching_7 = [
    [3, 4, 2, 6, 5, 7], 
    [6, 5, 4, 1, 3, 7], 
    [2, 4, 5, 1, 6, 7], 
    [5, 2, 3, 6, 1, 7],
    [3, 1, 2, 4, 6, 7],
    [5, 1, 3, 4, 2, 7],
    [1, 2, 3, 4, 5, 6]
]

In [10]:
sr_matching(external_matching_8, debug=True)

Input validation passed.
Stable matching found. Returning person : partner dictionary.
{'1': '4', '2': '3', '3': '2', '4': '1', '5': '6', '6': '5', '7': '8', '8': '7'}


([4, 3, 2, 1, 6, 5, 8, 7], 'Stable matching found after Phase 2.')

In [11]:
sr_matching(external_matching_10, debug=True)

Input validation passed.
Stable matching found. Returning person : partner dictionary.
{'1': '7', '2': '8', '3': '6', '4': '9', '5': '10', '6': '3', '7': '1', '8': '2', '9': '4', '10': '5'}


([7, 8, 6, 9, 10, 3, 1, 2, 4, 5], 'Stable matching found after Phase 2.')

In [12]:
sr_matching(external_matching_20, debug=True)

Input validation passed.
Stable matching found. Returning person : partner dictionary.
{'1': '8', '2': '4', '3': '9', '4': '2', '5': '17', '6': '14', '7': '13', '8': '1', '9': '3', '10': '16', '11': '15', '12': '18', '13': '7', '14': '6', '15': '11', '16': '10', '17': '5', '18': '12', '19': '20', '20': '19'}


([8, 4, 9, 2, 17, 14, 13, 1, 3, 16, 15, 18, 7, 6, 11, 10, 5, 12, 20, 19],
 'Stable matching found after Phase 2.')

In [13]:
sr_matching(external_matching_7, debug=True)

Input validation passed.
Stable matching found. Returning person : partner dictionary.
{'1': '6', '2': '4', '3': '5', '4': '2', '5': '3', '6': '1', '7': '-1'}


([6, 4, 5, 2, 3, 1, -1], 'Stable matching found after Phase 2.')

### Custom Test Cases

In [14]:
# empty matrix
custom_no_matching_empty = []

# one person (no matching should be possible)
custom_no_matching_1 = [[]]

# two people
custom_matching_2 = [[2], [1]]

# three people (odd: should add person and find a matching)
custom_matching_3 = [
    [3, 2],
    [3, 1],
    [1, 2]
]

In [15]:
sr_matching(custom_no_matching_empty, debug=True)

Input validation failed: preference_matrix must have size > 1
Invalid input. Must be n-by-m (where m = n - 1) list of lists of numbers.


(None,
 'Invalid input. Must be n-by-m (where m = n - 1) list of lists of numbers.')

In [16]:
sr_matching(custom_no_matching_1, debug=True)

Input validation failed: preference_matrix must have size > 1
Invalid input. Must be n-by-m (where m = n - 1) list of lists of numbers.


(None,
 'Invalid input. Must be n-by-m (where m = n - 1) list of lists of numbers.')

In [17]:
sr_matching(custom_matching_2, debug=True)

Input validation passed.
Stable matching found. Returning person : partner dictionary.
{'1': '2', '2': '1'}


([2, 1], 'Stable matching found after Phase 1.')

In [18]:
sr_matching(custom_matching_3, debug=True)

Input validation passed.
Stable matching found. Returning person : partner dictionary.
{'1': '3', '2': '-1', '3': '1'}


([3, -1, 1], 'Stable matching found after Phase 1.')

# Analysis of Stable Roommates Matching on Pair Research Data
Below, we analyze the impact of using the Stable Roommates algorithm on previous pairings. 

We begin by seeing
1. How frequently can we find stable matchings? 
2. When stable matchings are not possible, for what reason do they fail?

## Fetch Pairing Data from [pairresearch.io](http://pairresearch.io/)

In [19]:
import pandas as pd
from pymongo import MongoClient

import seaborn as sns
%matplotlib inline

In [20]:
uri = 'mongodb://delta:delta@ds011419.mlab.com:11419/pair-research'
dbName = 'pair-research'
client = MongoClient(uri)
db = client[dbName]
db.collection_names()

['affinities',
 'meteor_accounts_loginServiceConfiguration',
 'tasks_history',
 'groups',
 'users',
 'objectlabs-system.admin.collections',
 'pairs_history',
 'tasks',
 'system.indexes',
 'pairings',
 'objectlabs-system',
 'affinities_history']

In [21]:
users = pd.DataFrame(list(db.users.find()))
users.head()

Unnamed: 0,_id,createdAt,emails,groups,profile,services
0,dibWQsjhkpvC52AFp,2016-08-16 15:54:28.489,"[{'address': 'hjlkadfjkl@32897.coj', 'verified...",[],{'fullName': 'hihi'},{'password': {'bcrypt': '$2a$10$dkjBKl9Po3AINK...
1,BPQ7hyoHgghctHPqq,2016-08-29 18:24:50.295,"[{'address': 'egerber@northwestern.edu', 'veri...","[{'groupId': '9mdkMmj4pY8Q2TwqF', 'role': {'_i...",{'fullName': 'Liz Gerber'},{'password': {'bcrypt': '$2a$10$Q9SnAxCEjS1V92...
2,bZEjadPH7KrjM9PfD,2016-11-10 19:19:34.147,"[{'address': 'ampiper@northwestern.edu', 'veri...","[{'groupId': 'qPnf2DHHihugATnxD', 'role': {'_i...",{'fullName': 'ampiper@northwestern.edu'},{'password': {}}
3,8mRni9ixefux6bSz9,2016-12-09 01:55:36.706,"[{'address': 'hscho122@kaist.ac.kr', 'verified...",[],{'fullName': 'hscho122@kaist.ac.kr'},{'password': {}}
4,JXCrPvRJwM5pK4Wk7,2017-01-05 07:05:46.455,"[{'address': 'artydeveloperduck@gmail.com', 'v...",[],"{'fullName': 'Deokseong', 'avatar': 'http://or...",{'password': {'bcrypt': '$2a$10$Obf8jHjBnkqLT4...


In [22]:
groups = pd.DataFrame(list(db.groups.find()))
groups.head()

Unnamed: 0,_id,active,activePairing,creationDate,creatorId,creatorName,description,groupName,members,roles
0,tbyuDjH5WGMJbJE2d,True,,2016-07-11 19:09:30.520,n3acFJs3SiDorYxMS,Kevin Chen,First.,Kevin's Primordial Group,"[{'fullName': 'Kevin Chen', 'userId': 'n3acFJs...","[{'title': 'Rayquaza', '_id': 'z9rCTyfP827Bsv7..."
1,HfsBYcpG9NgicEdZr,True,,2016-08-10 18:54:36.396,vRoD2rA6fmkZLx636,Leesha,blah,test group,"[{'fullName': 'Leesha', 'userId': 'vRoD2rA6fmk...","[{'title': 'Professor', '_id': 'Auw8ZeNpaZkqRc..."
2,ZzJRhB7AEd4AS9BvK,True,,2016-08-16 15:44:46.098,33333333333333333,Demo Admin,A demo pair research group,8j4uj38o5qoxbt9,[],"[{'title': 'Professor', '_id': 'naXttGNRPG6CtY..."
3,PXqoCFnwQyLoxPKgw,True,,2016-10-20 21:22:35.309,33333333333333333,Demo Admin,A demo pair research group,k9fffpcpoxtlz0k9,[],"[{'title': 'Professor', '_id': 'SEtx2m82qF7cKH..."
4,KB5xeD6dnwo3w82dz,True,,2016-10-24 02:04:58.469,33333333333333333,Demo Admin,A demo pair research group,uj9sjlqr3rspp66r,[],"[{'title': 'Professor', '_id': 'GWWQow7F4hfF7j..."


In [23]:
tasks_history = pd.DataFrame(list(db.tasks_history.find()))
tasks_history.head()

Unnamed: 0,_id,groupId,name,pairingId,task,userId
0,MRpqhrhDq4ssfRS5e,4xEwAguXinpkMNDqR,Stella,85rSWzajxBtguuds6,swim lessons,CELY4ynWRe3b4Te9c
1,j2zmfAC3G6kjKpKe7,4xEwAguXinpkMNDqR,Kevin Chen,85rSWzajxBtguuds6,plauying,n3acFJs3SiDorYxMS
2,k4ewZSgDHsvDFkXpX,9mdkMmj4pY8Q2TwqF,Yongsung Kim,nRAQpsPhsQs4zRvTL,i need to send out a short-survey to interviewees,EDEFWcagLwCfXP5Jg
3,RZZWR8pABaJBKYNFu,9mdkMmj4pY8Q2TwqF,Julian Vicens,nRAQpsPhsQs4zRvTL,I would like to talk about different ways to m...,goGr47HDwtfphJ5xK
4,Xr3dvNreiwzq9ixrQ,9mdkMmj4pY8Q2TwqF,Spencer Carlson,nRAQpsPhsQs4zRvTL,Make educated guesses about the quality of my ...,vbsF64nAgoitwrNeB


In [24]:
pairings = pd.DataFrame(list(db.pairings.find()))
print('Pairing count: {}, Unique group count: {}'.format(len(pairings), len(pairings.groupId.unique())))
pairings.sort_values('timestamp', ascending=True).head()

Pairing count: 433, Unique group count: 83


Unnamed: 0,_id,groupId,pairings,timestamp
49,N23iLvjp2GWcsHYd5,9mdkMmj4pY8Q2TwqF,"[{'firstUserId': 'zBZSGgrZFfW5KH5vj', 'firstUs...",2016-08-05 20:14:57.480
50,MeTK9umMiJDFNLPi4,4HSwt5QhJqvaQ7Edd,"[{'firstUserId': '5rx8iB8kiqorTPybp', 'firstUs...",2016-08-10 18:58:08.699
51,TwryBu8ZuDmAyRFSr,4HSwt5QhJqvaQ7Edd,"[{'firstUserId': '5rx8iB8kiqorTPybp', 'firstUs...",2016-08-10 18:58:32.397
14,9WRzKqNvYct6tL8mm,HfsBYcpG9NgicEdZr,"[{'firstUserId': 'aNdSTecskgeAm2St5', 'firstUs...",2016-08-10 19:03:57.006
1,soiecrpv6CRPTqmkd,9mdkMmj4pY8Q2TwqF,"[{'firstUserId': 'PavTL8zD9664wvtfB', 'firstUs...",2016-08-29 18:22:48.499


In [25]:
affinities = pd.DataFrame(list(db.affinities.find()))
affinities.head()

Unnamed: 0,_id,groupId,helpeeId,helperId,value
0,e6rjGWDrWE5YKxdbh,NRg4vMMoxEAqTHazP,AX8FFZHzPa8eF8bBE,SFg6T8vhT56EeCkRX,5.0
1,mSnrrMX7y26NSQ7iN,NRg4vMMoxEAqTHazP,SFg6T8vhT56EeCkRX,AX8FFZHzPa8eF8bBE,5.0
2,w72kT4Ez7xYkfE8JF,NRg4vMMoxEAqTHazP,5FjQBco6MXaSFhap4,AX8FFZHzPa8eF8bBE,1.0
3,c5xFCfvPimbBsnsGg,NRg4vMMoxEAqTHazP,hkZoyLhrWetKwep3r,AX8FFZHzPa8eF8bBE,4.0
4,bbTEQ3mvL46mTTskJ,NRg4vMMoxEAqTHazP,AX8FFZHzPa8eF8bBE,5FjQBco6MXaSFhap4,5.0


In [26]:
pairs_history = pd.DataFrame(list(db.pairs_history.find()))
pairs_history.sort_values('timestamp', ascending=False).head()

Unnamed: 0,_id,firstUserId,firstUserName,firstUserRole,groupId,pairingId,secondUserId,secondUserName,secondUserRole,timestamp
1901,qqaEvWnqwHWtCqjs4,XYJc5ag7XK5m8P5cd,Maxine Whitely,Undergraduate Student,sM3z5FkZfsABqcj3g,4u2gDDfdjvSzK9RHa,PqHjaxzy7KijCWRjm,Andrew Finke,Undergraduate Student,2018-04-27 20:30:10.250
1900,Sz6gkNCEWNhZHAqzQ,aNdSTecskgeAm2St5,Leesha,Graduate Student,sM3z5FkZfsABqcj3g,4u2gDDfdjvSzK9RHa,zmwK4tJHtwLw8pLRC,Garrett,Graduate Student,2018-04-27 20:30:10.250
1899,jeM3Tai5JKqkZo6S6,KYm8XgZPdCibkfdh4,Suzy Lee,Undergraduate Student,sM3z5FkZfsABqcj3g,4u2gDDfdjvSzK9RHa,3cb7eEv3gY3xiBCk7,Daniel Zhu,Undergraduate Student,2018-04-27 20:30:10.250
1898,7gXKZREgJb3sfzdSh,9Wcpa2nSnEX8k7kMN,Maggie Lou,Undergraduate Student,sM3z5FkZfsABqcj3g,4u2gDDfdjvSzK9RHa,Ssiqz3bTymGZ6ryd4,Megan Conlon,Undergraduate Student,2018-04-27 20:30:10.250
1897,R3WjMNvbzo2YmMkjb,mdhFQ6PNiAhfP7ce2,Kapil Garg,Undergraduate Student,sM3z5FkZfsABqcj3g,4u2gDDfdjvSzK9RHa,oBkvE34HKoa2MaDme,Sehmon Burnam,Undergraduate Student,2018-04-27 20:30:10.250


In [27]:
affinities_history = pd.DataFrame(list(db.affinities_history.find()))
affinities_history['group_pairing_id'] = affinities_history['groupId'] + '-' + affinities_history['pairingId']
affinities_history.head()

Unnamed: 0,_id,groupId,helpeeId,helperId,pairingId,value,group_pairing_id
0,Ae74k8Yvq3i4Koj3A,4xEwAguXinpkMNDqR,n3acFJs3SiDorYxMS,CELY4ynWRe3b4Te9c,85rSWzajxBtguuds6,0.33,4xEwAguXinpkMNDqR-85rSWzajxBtguuds6
1,6o2XqHLfzwsPfTeEL,4xEwAguXinpkMNDqR,CELY4ynWRe3b4Te9c,n3acFJs3SiDorYxMS,85rSWzajxBtguuds6,1.0,4xEwAguXinpkMNDqR-85rSWzajxBtguuds6
2,Ny2qkvoqSMQuZE865,9mdkMmj4pY8Q2TwqF,goGr47HDwtfphJ5xK,EDEFWcagLwCfXP5Jg,nRAQpsPhsQs4zRvTL,-1.0,9mdkMmj4pY8Q2TwqF-nRAQpsPhsQs4zRvTL
3,rMiXTvAQRsHoSbpn3,9mdkMmj4pY8Q2TwqF,vbsF64nAgoitwrNeB,EDEFWcagLwCfXP5Jg,nRAQpsPhsQs4zRvTL,-1.0,9mdkMmj4pY8Q2TwqF-nRAQpsPhsQs4zRvTL
4,DfqvpcJEXZ5eKgPDs,9mdkMmj4pY8Q2TwqF,goGr47HDwtfphJ5xK,vbsF64nAgoitwrNeB,nRAQpsPhsQs4zRvTL,-1.0,9mdkMmj4pY8Q2TwqF-nRAQpsPhsQs4zRvTL


In [45]:
len(affinities_history.pairingId.unique())

Number of pairings: 355


Unnamed: 0,group_id,group_pair_id,odd_even_count,pairing_id,stable_printout,stable_result,stable_unstable,user_count
0,4xEwAguXinpkMNDqR,4xEwAguXinpkMNDqR-85rSWzajxBtguuds6,even,85rSWzajxBtguuds6,Stable matching found after Phase 1.,"[2, 1]",stable,2
1,9mdkMmj4pY8Q2TwqF,9mdkMmj4pY8Q2TwqF-nRAQpsPhsQs4zRvTL,even,nRAQpsPhsQs4zRvTL,Stable matching found after Phase 1.,"[2, 1, 6, 5, 4, 3]",stable,6
2,9mdkMmj4pY8Q2TwqF,9mdkMmj4pY8Q2TwqF-Q2Qffh22oZwWrRCWt,even,Q2Qffh22oZwWrRCWt,Failed at Phase 1: not everyone was proposed to.,,unstable,8
3,9mdkMmj4pY8Q2TwqF,9mdkMmj4pY8Q2TwqF-N23iLvjp2GWcsHYd5,odd,N23iLvjp2GWcsHYd5,Stable matching found after Phase 1.,"[2, 1, 7, 5, 4, 9, 3, -1, 6]",stable,9
4,4HSwt5QhJqvaQ7Edd,4HSwt5QhJqvaQ7Edd-MeTK9umMiJDFNLPi4,even,MeTK9umMiJDFNLPi4,Stable matching found after Phase 1.,"[2, 1]",stable,2


## Run Stable Matching with All Previous Pairs

In [51]:
def create_affinity_matrix(affinities): 
    """
    Creates an n^2 affinity matrix.
        
    Input:
        affinities (pandas DataFrame): dataframe with helpeeId, helperId, and value columns.
    
    Output:
        (list of list of numbers): matrix of affinities. 0 if no affinity between users.
    """
    # dont modify original dataframe
    affinities = deepcopy(affinities)
    
    # create user superset and user:index mapping
    user_superset = list(set(list(affinities['helperId']) + list(affinities['helpeeId'])))
    user_count = len(user_superset)
    user_index_dict = {user_superset[x]: x for x in range(user_count)}
    
    # create empty n^2 matrix
    affinity_matrix = [[0 for y in range(user_count)] for x in range(user_count)]
    
    # remap data values to UI values
    value_mappings = {
        '-1.0': 1,
        '0.0': 2,
        '0.33': 3,
        '0.66': 4,
        '1.0': 5
    }
    affinities['value'] = affinities['value'].astype(str)
    affinities.replace({"value": value_mappings}, inplace=True)
    affinities.drop_duplicates(inplace=True)
    
    # loop through data and populate matrix
    for index, row in affinities.iterrows():
        curr_helper_index = user_index_dict[row['helperId']]
        curr_helpee_index = user_index_dict[row['helpeeId']]
        curr_value = row['value']
        
        affinity_matrix[curr_helper_index][curr_helpee_index] = curr_value
        
    return affinity_matrix

def create_preference_matrix(affinity_matrix):
    """
    Converts an n^2 affinity matrix into a n-by-m preference matrix (where m = n - 1).
    
    Input: 
        affinity_matrix (list of list of numbers): matrix of affinities. 0 if no affinity between users.
    
    Return: 
        (list of list of numbers): preference matrix where each list is ordered list of person indices.
    """
    # create zipped lists of (index, rating)
    preference_matrix = [[(i + 1, value) for i, value in enumerate(x)] for x in affinity_matrix]
    
    # format each row
    for index, curr_person in enumerate(preference_matrix):
        curr_person.sort(key=lambda tup: tup[1], reverse=True)
        preference_matrix[index] = [person_rating[0] for person_rating in curr_person if person_rating[0] - 1 != index]
        
    return preference_matrix

In [55]:
unique_pairings = affinities_history.group_pairing_id.unique()
pairing_data = [{} for x in unique_pairings]

# TODO: run this in parallel 
for index, group_pair_id in enumerate(unique_pairings):
    # get data and create affinity + preference matrices
    curr_data = affinities_history[affinities_history['group_pairing_id'] == group_pair_id]
    curr_affinity_matrix = create_affinity_matrix(curr_data[['helperId', 'helpeeId', 'value']])
    curr_pref_matrix = create_preference_matrix(curr_affinity_matrix)
    
    # run stable roommates
    stable_result, debug = sr_matching(curr_pref_matrix)
    
    # create metadata about the current affinity and add data to pairing_data
    # ADD timestamp
    group_id, pairing_id = group_pair_id.split('-')
    user_count = len(curr_affinity_matrix)
    
    data_to_add = {
        'group_pair_id': group_pair_id,
        'group_id': group_id,
        'pairing_id': pairing_id,
        'user_count': user_count,
        'odd_even_count': 'even' if user_count % 2 == 0 else 'odd',
        'stable_result': stable_result,
        'stable_unstable': 'unstable' if stable_result is None else 'stable',
        'stable_printout': debug
    }
    
    pairing_data[index] = data_to_add

pairing_data_df = pd.DataFrame(pairing_data)

In [53]:
print('Number of pairings: {}'.format(len(pairing_data_df)))
pairing_data_df.head()

Number of pairings: 355


Unnamed: 0,group_id,group_pair_id,odd_even_count,pairing_id,stable_printout,stable_result,stable_unstable,user_count
0,4xEwAguXinpkMNDqR,4xEwAguXinpkMNDqR-85rSWzajxBtguuds6,even,85rSWzajxBtguuds6,Stable matching found after Phase 1.,"[2, 1]",stable,2
1,9mdkMmj4pY8Q2TwqF,9mdkMmj4pY8Q2TwqF-nRAQpsPhsQs4zRvTL,even,nRAQpsPhsQs4zRvTL,Stable matching found after Phase 1.,"[2, 1, 6, 5, 4, 3]",stable,6
2,9mdkMmj4pY8Q2TwqF,9mdkMmj4pY8Q2TwqF-Q2Qffh22oZwWrRCWt,even,Q2Qffh22oZwWrRCWt,Failed at Phase 1: not everyone was proposed to.,,unstable,8
3,9mdkMmj4pY8Q2TwqF,9mdkMmj4pY8Q2TwqF-N23iLvjp2GWcsHYd5,odd,N23iLvjp2GWcsHYd5,Stable matching found after Phase 1.,"[2, 1, 7, 5, 4, 9, 3, -1, 6]",stable,9
4,4HSwt5QhJqvaQ7Edd,4HSwt5QhJqvaQ7Edd-MeTK9umMiJDFNLPi4,even,MeTK9umMiJDFNLPi4,Stable matching found after Phase 1.,"[2, 1]",stable,2


In [56]:
stable_count = len(pairing_data_df[pairing_data_df['stable_unstable'] == 'stable'])
unstable_count = len(pairing_data_df[pairing_data_df['stable_unstable'] == 'unstable'])
total = stable_count + unstable_count

print('Stable Count: {}, Unstable Count: {}, Total: {} | Proportion: {}%'.format(stable_count, unstable_count, total, round(100 * stable_count / total, 2)))

Stable Count: 261, Unstable Count: 94, Total: 355 | Proportion: 73.52%


In [58]:
pairing_data_df.groupby(['stable_unstable', 'stable_printout']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,group_id,group_pair_id,odd_even_count,pairing_id,stable_result,user_count
stable_unstable,stable_printout,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
stable,Stable matching found after Phase 1.,231,231,231,231,231,231
stable,Stable matching found after Phase 2.,30,30,30,30,30,30
unstable,Failed at Phase 1: not everyone was proposed to.,81,81,81,81,0,81
unstable,Failed at Phase 2: could not find an all-or-nothing cycle len > 3.,10,10,10,10,0,10
unstable,"Failed at Verification after Phase 2: matching computed, but not valid.",3,3,3,3,0,3


In [35]:
# this peculiar: of the failures, most failed at Phase 1. Of those, most are odd. 
pairing_data_df.groupby(['stable_unstable', 'stable_printout', 'odd_even_count']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,group_id,group_pair_id,pairing_id,stable_result,user_count
stable_unstable,stable_printout,odd_even_count,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
stable,Stable matching found after Phase 1.,even,120,120,120,120,120
stable,Stable matching found after Phase 1.,odd,111,111,111,111,111
stable,Stable matching found after Phase 2.,even,22,22,22,22,22
stable,Stable matching found after Phase 2.,odd,8,8,8,8,8
unstable,Failed at Phase 1: not everyone was proposed to.,even,28,28,28,0,28
unstable,Failed at Phase 1: not everyone was proposed to.,odd,53,53,53,0,53
unstable,Failed at Phase 2: could not find an all-or-nothing cycle len > 3.,even,8,8,8,0,8
unstable,Failed at Phase 2: could not find an all-or-nothing cycle len > 3.,odd,2,2,2,0,2
unstable,"Failed at Verification after Phase 2: matching computed, but not valid.",even,3,3,3,0,3


In [36]:
# deeper analysis on odd unstable cases
odd_unstable_df = pairing_data_df[(pairing_data_df['stable_unstable'] == 'unstable') & (pairing_data_df['odd_even_count'] == 'odd')]
odd_unstable_ids = list(odd_unstable_df.group_pair_id.unique())

In [37]:
odd_unstable_df.head()

Unnamed: 0,group_id,group_pair_id,odd_even_count,pairing_id,stable_printout,stable_result,stable_unstable,user_count
11,kY7xHo6c5m5tCiQMH,kY7xHo6c5m5tCiQMH-6iDP6bNp3pxLgXjJE,odd,6iDP6bNp3pxLgXjJE,Failed at Phase 1: not everyone was proposed to.,,unstable,5
12,kY7xHo6c5m5tCiQMH,kY7xHo6c5m5tCiQMH-LYkNgh3bPYZSsxBYX,odd,LYkNgh3bPYZSsxBYX,Failed at Phase 1: not everyone was proposed to.,,unstable,5
13,kY7xHo6c5m5tCiQMH,kY7xHo6c5m5tCiQMH-RoiXFxBczLFsL3p8T,odd,RoiXFxBczLFsL3p8T,Failed at Phase 1: not everyone was proposed to.,,unstable,5
14,kY7xHo6c5m5tCiQMH,kY7xHo6c5m5tCiQMH-GkHWhof4i9aaWmueG,odd,GkHWhof4i9aaWmueG,Failed at Phase 1: not everyone was proposed to.,,unstable,5
15,kY7xHo6c5m5tCiQMH,kY7xHo6c5m5tCiQMH-6gKDsYAEWdGWEuoxj,odd,6gKDsYAEWdGWEuoxj,Failed at Phase 1: not everyone was proposed to.,,unstable,5


In [40]:
curr_affinity_matrix

[[0, 5, 3, 3, 1, 3, 3, 4, 1],
 [5, 0, 4, 3, 5, 4, 4, 5, 3],
 [5, 5, 0, 4, 5, 3, 5, 4, 1],
 [1, 4, 5, 0, 5, 4, 2, 5, 5],
 [3, 2, 2, 3, 0, 2, 1, 2, 2],
 [5, 5, 2, 1, 3, 0, 2, 2, 4],
 [4, 4, 4, 1, 3, 3, 0, 4, 4],
 [3, 3, 3, 4, 3, 3, 4, 0, 2],
 [1, 2, 1, 3, 2, 4, 2, 2, 0]]

In [41]:
curr_pref_matrix

[[2, 8, 3, 4, 6, 7, 5, 9],
 [1, 5, 8, 3, 6, 7, 4, 9],
 [1, 2, 5, 7, 4, 8, 6, 9],
 [3, 5, 8, 9, 2, 6, 7, 1],
 [1, 4, 2, 3, 6, 8, 9, 7],
 [1, 2, 9, 5, 3, 7, 8, 4],
 [1, 2, 3, 8, 9, 5, 6, 4],
 [4, 7, 1, 2, 3, 5, 6, 9],
 [6, 4, 2, 5, 7, 8, 1, 3]]

In [42]:
# TODO
# see if existing matchings from MWM are currently stable
# try to pair odd people by randomly ejecting one person, rather than filling