In [None]:
import os
from random import shuffle
from math import ceil
import pandas as pd
from matching.games import HospitalResident

In [None]:
INPUT_FPATH = os.path.join(os.getcwd(),'data.xlsx')
OUTPUT_FPATH = os.path.join(os.getcwd(),'output.csv')

In [None]:
# import data
df_input = pd.read_excel(INPUT_FPATH,dtype=object)

In [None]:
# add rows for JVs
for idx,row in df_input.loc[(df_input['Sig Other Coming'] == 'T') & (df_input['Sig Other Kellogg Student'] != 'T')].iterrows():
    insertion = {
        'Netid': [str(row['Netid'])+'JV'],
        'ParentNetid': [str(row['Netid'])],
        'Gender': [row['Significant Gender']],
        'Program': ['JV'],
        'Passport Country': [row['Significant Passport Country']],
        'Vote1': [row['Vote1']],
        'Vote2': [row['Vote2']],
        'Vote3': [row['Vote3']],
        'Vote4': [row['Vote4']],
        'Vote5': [row['Vote5']],
        'Vote6': [row['Vote6']],
        'Vote7': [row['Vote7']],
        'Vote8': [row['Vote8']],
        'Vote9': [row['Vote9']],
        'Vote10': [row['Vote10']],
    }
    
    df_input = pd.concat([df_input,pd.DataFrame.from_dict(insertion,orient='columns')],sort=False,ignore_index=True)

In [None]:
# groupings for program, nationality, gender
df_input['Program'] = df_input['Program'].apply(lambda x: x if x in ['2YMBA'] else 'OTHER') # 1YMBA
df_input['Passport Country'] = df_input['Passport Country'].apply(lambda x: x if x in ['UNITED STATES'] else 'OTHER')
# df_input['Gender'] = df_input['Gender'].apply(lambda x: '1' if x in ['M'] else '0') # 1YMBA

In [None]:
# drop cols
votes = ['Vote'+str(i+1) for i in range(10)]
keep_cols = [
    'Netid','ParentNetid','Gender','Program',
    'Passport Country',
] + votes
df_input = df_input[keep_cols]

In [None]:
# demographic constraint distribution range calculations
margin = 0.15
# constraints
dist_female = round(df_input.groupby('Gender')['Netid'].count()/len(df_input),4)['F']
rng_female = [round(dist_female-margin,2),round(dist_female+margin,2)]
print(dist_female)
print(rng_female)
dist_2y = round(df_input.groupby('Program')['Netid'].count()/len(df_input),4)['2YMBA']
rng_2y = [round(dist_2y-margin,2),round(dist_2y+margin,2)]
print(dist_2y)
print(rng_2y)
dist_amer = round(df_input.groupby('Passport Country')['Netid'].count()/len(df_input),4)['UNITED STATES']
# rng_nationality = [round(dist_amer-0.25,2),round(dist_amer+0.25,2)]
rng_amer = [0.1,0.9]
print(dist_amer)
print(rng_amer)

In [None]:
trips_unique = [df_input[i].tolist() for i in votes]
trips_unique = [t for trip in trips_unique for t in trip if str(t) != 'nan']
trips_unique = set(trips_unique)
netid_unique = [str(i) for i in df_input['Netid'].unique()]

In [None]:
trip_cap = 20
trips_needed = ceil(len(netid_unique)/trip_cap)
trips_final = pd.melt(
    df_input[['Netid']+votes],
    id_vars=['Netid'],
    value_vars=votes,
    var_name='Vote',
    value_name='Trip'
).groupby('Trip')['Netid'].count().sort_values(ascending=False)[:trips_needed+2].index.tolist()
trip_capacity = {trip:trip_cap for trip in trips_final}

In [None]:
_trips = pd.melt(df_input[['Netid']+votes],id_vars=['Netid'],value_vars=votes,var_name='Vote',value_name='Trip').groupby('Trip')['Netid'].apply(list)
_trips = _trips.loc[_trips.index.isin(trips_final)].to_dict()

In [None]:
student_preferences = {}
no_preference_students = []
for idx,row in df_input.iterrows():
    prefs = [i for i in row[votes].values.tolist() if (str(i) != 'nan') & (i in trips_final)]
    if prefs != []:
        student_preferences[str(row['Netid'])] = prefs
    else:
        no_preference_students.append(str(row['Netid']))
no_preference_students

In [None]:
iterations = 10000

for i in range(iterations):

    trip_preferences = {}
    for trip,netids in _trips.items():
        _netids = [str(i) for i in netids]
        shuffle(_netids)
        # put a JV directly after partner - max chance of assigned together
        for netid in _netids:
            if netid[-2:]=='JV':
                _netids.insert(
                    # in the spot directly after student partner
                    _netids.index(netid[:-2])+1,
                    # insert the jv netid
                    _netids.pop(_netids.index(netid))
                )
        trip_preferences[trip] = _netids

    game = HospitalResident.create_from_dictionaries(
        student_preferences,
        trip_preferences,
        trip_capacity
    )

    match = game.solve(optimal='resident')

    matched_students = []
    for _, students in match.items():
        for student in students:
            matched_students.append(student.name)

    unmatched_students = set(student_preferences.keys()) - set(matched_students)

    if len(unmatched_students) == 0:
        trip_num=0
        try:
            for trip,students in match.items():
                trip_num+=1
                students = [str(i.name) for i in students]
                s = len(students)
                f = 0
                am = 0
                y2 = 0
                # trip size
                try:
                    assert s >= 12
                except:
                    raise Exception('less than 12 students')
                for student in students:
                    # jvs with partners
                    if student[-2:]=='JV':
                        try:
                            assert student[:-2] in students
                        except:
                            raise Exception('jv',student,'not paired with partner')
                    # demographic info
                    demo = df_input.loc[df_input['Netid'].apply(str)==student]
                    if demo['Gender'].values[0] == 'F':
                        f+=1
                    if demo['Passport Country'].values[0] == 'UNITED STATES':
                        am+=1
                    if demo['Program'].values[0] == '2YMBA':
                        y2+=1
                # gender dist
                # try:
                #     assert rng_female[0] <= round(f/s,3) <= rng_female[1]
                # except:
                #     raise Exception('failed under gender constraint')
                # # program dist
                # try:
                #     assert rng_2y[0] <= round(y2/s,3) <= rng_2y[1]
                # except:
                #     raise Exception('failed under program constraint')
                # # nationality dist
                # try:
                #     assert rng_amer[0] <= round(am/s,3) <= rng_amer[1]
                # except:
                #     raise Exception('failed under nationality constraint')
            print('match found!')
            MATCH = match
            break

        except Exception as e:
            # raise
            print('match',i+1,'trip',trip_num,'failed:',e)
            continue

print('run out of iterations')