In [445]:
INPUT_FPATH = os.path.join(os.getcwd(),'data.xlsx')
OUTPUT_FPATH = os.path.join(os.getcwd(),'output.csv')

In [446]:
import os
import random
import numpy as np
import pandas as pd
from matching.games import HospitalResident

In [447]:
# import data
df_input = pd.read_excel(INPUT_FPATH,dtype=object)

In [448]:
# add rows for JVs
for idx,row in df_input.loc[(df_input['Sig Other Coming'] == 'T') & (df_input['Sig Other Kellogg Student'] != 'T')].iterrows():
    insertion = {
        'Netid': [str(row['Netid'])+'JV'],
        'ParentNetid': [str(row['Netid'])],
        'Gender': [row['Significant Gender']],
        'Program': ['JV'],
        'Passport Country': [row['Significant Passport Country']],
        'Vote1': [row['Vote1']],
        'Vote2': [row['Vote2']],
        'Vote3': [row['Vote3']],
        'Vote4': [row['Vote4']],
        'Vote5': [row['Vote5']],
        'Vote6': [row['Vote6']],
        'Vote7': [row['Vote7']],
        'Vote8': [row['Vote8']],
        'Vote9': [row['Vote9']],
        'Vote10': [row['Vote10']],
    }
    
    df_input = pd.concat([df_input,pd.DataFrame.from_dict(insertion,orient='columns')],sort=False,ignore_index=True)

In [449]:
# groupings for program, nationality, gender
df_input['Program'] = df_input['Program'].apply(lambda x: x if x in ['2YMBA'] else 'OTHER') # 1YMBA
df_input['Passport Country'] = df_input['Passport Country'].apply(lambda x: x if x in ['UNITED STATES'] else 'OTHER')
# df_input['Gender'] = df_input['Gender'].apply(lambda x: '1' if x in ['M'] else '0') # 1YMBA

In [None]:
# drop cols
votes = ['Vote'+str(i+1) for i in range(10)]
keep_cols = [
    'Netid','ParentNetid','Gender','Program',
    'Passport Country',
] + votes
df_input = df_input[keep_cols]

In [None]:
# demographic constraint distribution range calculations
margin = 0.1
# constraints
dist_gender = round(df_input.groupby('Gender')['Netid'].count()/len(df_input),4)['F']
rng_gender = [round(dist_gender-margin,2),round(dist_gender+margin,2)]
print(dist_gender)
print(rng_gender)
dist_nationality = round(df_input.groupby('Passport Country')['Netid'].count()/len(df_input),4)['UNITED STATES']
rng_nationality = [round(dist_nationality-margin,2),round(dist_nationality+margin,2)]
print(dist_nationality)
print(rng_nationality)
dist_program = round(df_input.groupby('Program')['Netid'].count()/len(df_input),4)['2YMBA']
rng_program = [round(dist_program-margin,2),round(dist_program+margin,2)]
print(dist_program)
print(rng_program)

In [450]:
trips_unique = [df_input[i].tolist() for i in votes]
trips_unique = [t for trip in trips_unique for t in trip if str(t) != 'nan']
trips_unique = set(trips_unique)

In [451]:
netid_unique = [str(i) for i in df_input['Netid'].unique()]

In [452]:
trip_capacity = {trip:20 for trip in trips_unique}
_trips = pd.melt(df_input[['Netid']+votes],id_vars=['Netid'],value_vars=votes,var_name='Vote',value_name='Trip').groupby('Trip')['Netid'].apply(list).to_dict()

In [453]:
student_preferences = {
    str(row['Netid']) : [i for i in row[votes].values.tolist() if str(i) != 'nan']
    for idx,row in df_input.iterrows()
}

In [499]:
iterations = 10

for i in range(iterations):

    trip_preferences = {}
    for trip,netids in _trips.items():
        _netids = [str(i) for i in netids]
        random.shuffle(_netids)
        # put a JV directly after partner - max chance of assigned together
        for netid in _netids:
            if netid[-2:]=='JV':
                _netids.insert(
                    # in the spot directly after student partner
                    _netids.index(netid[:-2])+1,
                    # insert the jv netid
                    _netids.pop(_netids.index(netid))
                )
        trip_preferences[trip] = _netids

    game = HospitalResident.create_from_dictionaries(
        student_preferences,
        trip_preferences,
        trip_capacity
    )

    matching = game.solve(optimal='resident')

    matched_students = []
    for _, students in matching.items():
        for student in students:
            matched_students.append(student.name)

    unmatched_students = set(student_preferences.keys()) - set(matched_students)

    if len(unmatched_students) == 0:
        try:
            for trip,students in match.items():
                students = [str(i.name) for i in students]
                s = len(students)
                f = 0
                am = 0
                y2 = 0
                # trip size
                try:
                    assert s >= 14
                except:
                    raise Exception('less than 14 students')
                for student in students:
                    # jvs with partners
                    if student[-2:]=='JV':
                        try:
                            assert student[:-2] in students
                        except:
                            raise Exception('jv not paired with partner')
                    # demographic info
                    demo = df_input.loc[df_input['Netid'].apply(str)==student]
                    if demo['Gender'].values[0] == 'F':
                        f+=1
                    if demo['Passport Country'].values[0]=='UNITED STATES':
                        am+=1
                    if demo['Program'].values[0]=='2YMBA':
                        y2+=1
                # gender dist
                try:
                    assert rng_gender[0] <= (f/s) <= rng_gender[1]
                except:
                    raise Exception('failed under gender constraint')
                # nationality dist
                try:
                    assert rng_nationality[0] <= (am/s) <= rng_nationality[1]
                except:
                    raise Exception('failed under nationality constraint')
                # program dist
                try:
                    assert rng_program[0] <= (y2/s) <= rng_program[1]
                except:
                    raise Exception('failed under program constraint')
            print('match found')
            MATCH = match
            break

        except Exception as e:
            # raise
            print('match failed:',e)
            continue

print('run out of iterations')

match failed: failed under nationality constraint
run out of iterations
