# DinnerMatch
*** By Alexander Munoz ***

In [60]:
import pandas as pd
import numpy as np
from itertools import izip_longest
from itertools import chain

In [3]:
def grouper(iterable, n, fillvalue=None):
    "Collect data into fixed-length chunks or blocks"
    # Taken from itertools recipes:
    # https://docs.python.org/2/library/itertools.html#recipes
    # grouper('ABCDEFG', 3, 'x') --> ABC DEF Gxx
    args = [iter(iterable)] * n
    return izip_longest(fillvalue=fillvalue, *args)

In [113]:
data = pd.read_csv('data.csv')
data = data.ix[:,[1,2,6,8,9]]
data.columns = ['name', 'email', 'times', 'friend_name', 'friend_email']
data = data.drop(57) #one student accidentally filled out the form twice, drop one submission
data = data.set_index('name')
print data.shape
data.head()

(72, 4)


Unnamed: 0_level_0,email,times,friend_name,friend_email
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Eesha Khare,ekhare@college.harvard.edu,"Wednesday, 3/8 6:00pm, Wednesday, 3/8 6:30pm...",Anne Deng,annedeng@college.harvard.edu
Kamara Swaby,kswaby01@college.harvard.edu,"Monday, 2/27 5:00pm, Monday, 2/27 6:00pm, Mo...",,
Jacob Meyerson,jacobmeyerson@college.harvard.edu,"Friday, 3/3 6:30pm, Tuesday, 3/7 6:30pm, Wed...",,
Pieter weemaes,Pweemaes@college.harvard.edu,"Monday, 2/27 5:00pm, Monday, 2/27 6:00pm, Mo...",,
Russell Pekala,russellpekala@college.harvard.edu,"Monday, 2/27 6:00pm, Monday, 2/27 6:30pm, Tu...",,


In [154]:
times_dict = dict() #key is name, value is list of available times
for i in range(len(data['times'])):
    time_list_buffer = data['times'][i].split('pm, ') #split times
    time_list_buffer[-1] = time_list_buffer[-1][:-2] #remove last 'pm'
    times_dict[data.index.values[i]] = time_list_buffer

In [155]:
times_options = set()
for person in times_dict:
    times_options = times_options.union(set(times_dict[person]))
times_options = list(times_options)
np.random.shuffle(times_options)
print times_options

['Monday, 2/27  5:00', 'Thursday, 3/9  6:30', 'Monday, 2/27  6:00', 'Monday, 3/6  5:00', 'Wednesday, 3/8  6:30', 'Wednesday, 3/1  6:00', 'Thursday, 3/9  6:00', 'Thursday, 3/2  5:00', 'Wednesday, 3/1  5:00', 'Saturday, 3/4  6:00', 'Tuesday, 3/7  6:30', 'Friday, 3/3  6:00', 'Sunday, 3/5  5:00', 'Friday, 3/3  5:00', 'Tuesday, 3/7  6:00', 'Thursday, 3/9  5:00', 'Saturday, 3/4  5:00', 'Wednesday, 3/8  5:00', 'Sunday, 3/5  6:30', 'Tuesday, 2/28  6:00', 'Thursday, 3/2  6:00', 'Monday, 3/6  6:00', 'Monday, 2/27  6:30', 'Tuesday, 2/28  6:30', 'Tuesday, 2/28  5:00', 'Monday, 3/6  6:30', 'Friday, 3/3  6:30', 'Thursday, 3/2  6:30', 'Wednesday, 3/8  6:00', 'Wednesday, 3/1  6:30', 'Tuesday, 3/7  5:00', 'Saturday, 3/4  6:30', 'Sunday, 3/5  6:00']


In [156]:
result_dict = dict()
for curr_time in times_options:
    curr_time_availables = []
    for person in times_dict:
        if curr_time in times_dict[person]:
            curr_time_availables.append(person)
    if len(curr_time_availables) == 1:
        pass
    else:
        np.random.shuffle(curr_time_availables)
        if len(curr_time_availables) % 3 == 0 or len(curr_time_availables) % 3 == 2:
            result_dict[curr_time] = list(grouper(curr_time_availables, 3))
        else:
            result_dict[curr_time] = list(grouper(curr_time_availables, 4))
        for p in curr_time_availables:
            times_dict.pop(p) #remove people who have been picked, so they don't get picked twice

In [157]:
times_to_pop = [] #delete empty times
for t in result_dict:
    if result_dict[t] == []:
        times_to_pop.append(t)
for p in times_to_pop:
    result_dict.pop(p)

#pretty print
for t in result_dict:
    print t
    print result_dict[t]
    print

Saturday, 3/4  5:00
[('Gita Abhiraman', 'Elaine', None)]

Monday, 2/27  5:00
[('Mia Vitale', 'Eric Po', 'Devon Black'), ('Justin Fox', 'Arlesia McGowan', 'Cass Hastie'), ('Francesca Noelette ', 'Malaaz Ghandour', 'Jen Xu'), ('Siqi Liu', 'Cecilia Yao', 'Andrew Mayo'), ('Mark Steinbach', 'Brooke Bourgeois', 'Kamara Swaby'), ('Steph Ferrarie', 'Chris Jury', 'Kathryn Kearney'), ("Camille N'Diaye-Muller", 'Victoria', 'Ying Guo'), ('Katie Smilow ', 'Pieter weemaes', 'Meredith Clark'), ('William Chang', 'Lethu Ntshinga', None)]

Saturday, 3/4  6:00
[('Sarah Wu', 'Jullian Duran', 'Noemi Urquiza'), ('Celeste Mendoza', 'Uju Nwosu', 'Stephanie Guo')]

Monday, 2/27  6:00
[('Russell Pekala', 'Joey Colton', 'Natalie Vega', 'Christian Floyd'), ('Lucas Cassels', 'Tony Lin', 'Walker Maeder-York', None)]

Monday, 3/6  5:00
[('Ruiqi He', 'Belle Lee', 'Michael Ge')]

Sunday, 3/5  6:30
[('Chinaza', 'Nakoa Farrant', None)]

Wednesday, 3/8  6:30
[('noemi valdez', 'Charlotte Anrig', 'Eva DiIanni-Miller'), ('N

In [158]:
#ensure no one leftover
if not bool(times_dict):
    print "No one left over!"
else:
    print "SOME PEOPLE UNGROUPED"
    print times_dict

No one left over!


In [164]:
names = []
for timepoint in result_dict:
    for pairing in result_dict[timepoint]:
        for name in list(pairing):
            if str(name) != 'None':
                names.append(name)
                
emails = [data.ix[n,'email'] for n in names]
email_out = pd.DataFrame(emails, index=names, columns=['email'])
emails_out.head()

Unnamed: 0,email
Gita Abhiraman,gitaabhiraman@college.harvard.edu
Elaine,elainedai@college.harvard.edu
Mia Vitale,miavitale@college.harvard.edu
Eric Po,ericpo@college.harvard.edu
Devon Black,devonblack@college.harvard.edu


In [None]:
#save emails
emails_out.to_csv('emails_out.csv', sep=',')