# Teaming

First we pick a seed for a complete random assignment:

In [None]:
seed = 833
seed

In [2]:
from google.colab import auth
auth.authenticate_user()

import gspread
from google.auth import default
creds, _ = default()

gc = gspread.authorize(creds)

In [None]:
worksheet = gc.open_by_url('https://docs.google.com/spreadsheets/d/1BSW9jrEKLDIdyjhUhuwKqkiZrRSrMslM5QRFbMQcqYU/edit?usp=sharing').get_worksheet(0)  # URL to the PUBLIC list

rows = worksheet.get_all_values()

# Convert to a DataFrame and render.
import pandas as pd
import numpy as np
students = pd.DataFrame.from_records(rows)

students = students.iloc[1:,[0,3,4,5,6,7,8,9,10]]

students.columns = ['name', 'email', 'team', 'Finance', 'Healthcare', 'BI', 'Technology', 'Marketing', 'Entertainment']

students = students.replace(r'^\s*$', 0, regex=True)
students['team'] = students['team'].astype('bool')

d = {'TRUE': 1, 'FALSE': 0}

students['Finance'] = students['Finance'].map(d)
students['Healthcare'] = students['Healthcare'].map(d)
students['BI'] = students['BI'].map(d)
students['Technology'] = students['Technology'].map(d)
students['Marketing'] = students['Marketing'].map(d)
students['Entertainment'] = students['Entertainment'].map(d)

final_aoi = ['Finance', 'Healthcare', 'BI', 'Technology', 'Marketing', 'Entertainment']
students['interests'] = [np.asarray(x) for x in students[final_aoi].values.tolist()]
students = students[['name', 'email', 'team'] + final_aoi + ['interests']]

students.head(6)

In [None]:
# constructing and sorting the PMs table
# PMs with smaller number of interests go first to maximize match
PMs = students[students.team].copy()
PMs['num_areas'] = [sum(interest) for interest in PMs.interests]
PMs.sort_values('num_areas', inplace=True)
PMs = PMs.reset_index(drop=True)
PMs

In [None]:
while True:
    print(seed)  # wil try different seeds until no team has a 0 overlap (seed += 1)

    students_ = students.copy()
    teams = pd.DataFrame()

    for i, lead in PMs.iterrows():
        lead_interest = PMs.interests[i]
        teammates = students_[~students_.team].reset_index(drop=True).copy()
        teammates['match_score'] = [sum(interest * lead_interest) for interest in teammates.interests]
        teammates.sort_values('match_score', ascending=False, inplace=True)
        teammates = teammates.reset_index(drop=True)

        take_n = (len(students) // len(PMs)) - 1  # number of team members: take_n or take_n + 1
        if i >= len(PMs)- (students_.shape[0] - ((take_n+1) * len(PMs))):
            take_n += 1

        lead_ = students_[students_['name'] == PMs.name[i]][['name', 'email', 'interests']]

        n_top_matches = len(teammates[teammates.match_score == max(teammates.match_score)])
        if n_top_matches >= take_n:
            team_tmp = teammates[teammates.match_score == max(teammates.match_score)][['name','email','interests','match_score']].sample(n=take_n, random_state = seed)
        else:
            team_tmp = teammates[teammates.match_score == max(teammates.match_score)][['name','email','interests','match_score']]
            second_matches = teammates[teammates.match_score == max(teammates.match_score)-1][['name','email','interests','match_score']]
            if len(second_matches) >= take_n-n_top_matches:
                team_tmp = pd.concat([second_matches.sample(n=take_n-n_top_matches, random_state = seed), team_tmp])
            else:
                team_tmp = teammates[['name','email','interests','match_score']].head(take_n)

        students_.loc[students_['name'].isin(team_tmp['name']), 'team'] = True
        team_tmp = pd.concat([lead_, team_tmp])
        team_tmp['overlap'] = sum([np.prod(np.array([i for i in team_tmp.interests])[:,j]) for j in range(6)])  # 6: number of interests
        team_tmp['team'] = i+1
        teams = pd.concat([teams, team_tmp])

    teams = teams.reset_index(drop=True)
    if np.prod(teams.overlap) > 0:
        break
    seed = seed + 1

teams

In [None]:
teams.to_csv('teams-IS833.csv', index=False)