# DinnerMatch
*** By Alexander Munoz ***

In [1]:
import pandas as pd
import numpy as np
from itertools import izip_longest
from itertools import chain

In [2]:
def grouper(iterable, n, fillvalue=None):
    "Collect data into fixed-length chunks or blocks"
    # Taken from itertools recipes:
    # https://docs.python.org/2/library/itertools.html#recipes
    # grouper('ABCDEFG', 3, 'x') --> ABC DEF Gxx
    args = [iter(iterable)] * n
    return izip_longest(fillvalue=fillvalue, *args)

In [3]:
data = pd.read_csv('data.csv')
data = data.ix[:,[1,2,6,8,9]]
data.columns = ['name', 'email', 'times', 'friend_name', 'friend_email']
data = data.drop(57) #one student accidentally filled out the form twice, drop one submission
data = data.set_index('name')
print data.shape
#data.head()

(72, 4)


In [9]:
while(True): #loop until successful match with no students leftover
    times_dict = dict() #key is name, value is list of available times
    for i in range(len(data['times'])):
        time_list_buffer = data['times'][i].split('pm, ') #split times
        time_list_buffer[-1] = time_list_buffer[-1][:-2] #remove last 'pm'
        times_dict[data.index.values[i]] = time_list_buffer

    times_options = set() #determine total time options students had to choose from, shuffled
    for person in times_dict:
        times_options = times_options.union(set(times_dict[person]))
    times_options = list(times_options)
    np.random.shuffle(times_options)

    # NOTE: remove time options from Monday, as the dining hall is closed that day
    times_options = [x for x in times_options if x not in \
     ['Monday, 2/27  5:00', 'Monday, 2/27  6:30', 'Monday, 2/27  6:00']]

    result_dict = dict()
    for curr_time in times_options:
        curr_time_availables = []
        for person in times_dict:
            if curr_time in times_dict[person]:
                curr_time_availables.append(person)
        if len(curr_time_availables) == 1:
            pass
        else:
            np.random.shuffle(curr_time_availables)
            if len(curr_time_availables) % 3 == 0 or len(curr_time_availables) % 3 == 2:
                result_dict[curr_time] = list(grouper(curr_time_availables, 3))
            else:
                result_dict[curr_time] = list(grouper(curr_time_availables, 4))
            for p in curr_time_availables:
                times_dict.pop(p) #remove people who have been picked, so they don't get picked twice

    times_to_pop = [] #delete empty times
    for t in result_dict:
        if result_dict[t] == []:
            times_to_pop.append(t)
    for p in times_to_pop:
        result_dict.pop(p)
    
    if not bool(times_dict): #if all students have been sorted, break
        break

#pretty print results
for t in result_dict:
    print t
    print '[',
    for pair in result_dict[t]:
        print '(',
        for name in pair:
            print '_name_', ', ',
        print ')',
    print ']'
    print

Thursday, 3/2  6:00
[ ( _name_ ,  _name_ ,  _name_ ,  ) ]

Saturday, 3/4  5:00
[ ( _name_ ,  _name_ ,  _name_ ,  ) ]

Wednesday, 3/8  5:00
[ ( _name_ ,  _name_ ,  _name_ ,  ) ]

Tuesday, 2/28  6:00
[ ( _name_ ,  _name_ ,  _name_ ,  _name_ ,  ) ( _name_ ,  _name_ ,  _name_ ,  _name_ ,  ) ]

Saturday, 3/4  6:00
[ ( _name_ ,  _name_ ,  _name_ ,  _name_ ,  ) ]

Wednesday, 3/8  6:00
[ ( _name_ ,  _name_ ,  _name_ ,  ) ]

Tuesday, 3/7  5:00
[ ( _name_ ,  _name_ ,  _name_ ,  ) ( _name_ ,  _name_ ,  _name_ ,  ) ( _name_ ,  _name_ ,  _name_ ,  ) ( _name_ ,  _name_ ,  _name_ ,  ) ( _name_ ,  _name_ ,  _name_ ,  ) ( _name_ ,  _name_ ,  _name_ ,  ) ( _name_ ,  _name_ ,  _name_ ,  ) ( _name_ ,  _name_ ,  _name_ ,  ) ( _name_ ,  _name_ ,  _name_ ,  ) ( _name_ ,  _name_ ,  _name_ ,  ) ]

Friday, 3/3  6:30
[ ( _name_ ,  _name_ ,  _name_ ,  ) ( _name_ ,  _name_ ,  _name_ ,  ) ( _name_ ,  _name_ ,  _name_ ,  ) ( _name_ ,  _name_ ,  _name_ ,  ) ( _name_ ,  _name_ ,  _name_ ,  ) ( _name_ ,  _name_ ,  _nam

In [10]:
names = []
for timepoint in result_dict:
    for pairing in result_dict[timepoint]:
        for name in list(pairing):
            if str(name) != 'None':
                names.append(name)
                
emails = [data.ix[n,'email'] for n in names]
emails_out = pd.DataFrame(emails, index=names, columns=['email'])
#emails_out.head()

In [32]:
#save emails
emails_out.to_csv('emails_out.csv', sep=',')