# Codes for Matching Simulation

### Matching Algorithms

In [None]:
# Random
def alg_random(giver, takers, g2t, t2g):
    candidates = random.sample(takers, n_choice)
    return candidates

# CF: Collaborative Filtering
def alg_CF(givers, takers, scores):
    df = scores[scores.giver.isin(givers) & scores.taker.isin(takers)]
    reader = Reader(rating_scale=(1, 5))
    data = Dataset.load_from_df(df, reader)
    trainset = data.build_full_trainset()
    model = SVD(n_factors=100, random_state=0)
    model.fit(trainset)
    def get_top_n(giver):
        not_rated = df.loc[df['giver'] != giver, 'taker'].unique()
        predictions = [model.predict(giver, taker) for taker in not_rated]
        top_n_takers = sorted(predictions, key=lambda x: x.est, reverse=True)[:n_choice]
        return [pred.iid for pred in top_n_takers]
    cand_dict = {}
    for giver in givers:
        cand_dict[giver] = get_top_n(giver)
    return cand_dict

# EM: Efficiency-Maximizing
def alg_EM(giver, takers, g2t, t2g):
    candidates = g2t[g2t.giver == giver].sort_values('y_pred', ascending=False).taker[:n_choice]
    return candidates

# FA: Fairness-Aware
# FA@1
def alg_FA1(giver, takers, g2t, t2g):
    superstar = round(len(takers)*0.01)
    candidates = g2t[g2t.giver == giver].sort_values('y_pred', ascending=False).taker[superstar:superstar+n_choice]
    return candidates
# FA@2
def alg_FA2(giver, takers, g2t, t2g):
    superstar = round(len(takers)*0.02)
    candidates = g2t[g2t.giver == giver].sort_values('y_pred', ascending=False).taker[superstar:superstar+n_choice]
    return candidates
# FA@5
def alg_FA5(giver, takers, g2t, t2g):
    superstar = round(len(takers)*0.05)
    candidates = g2t[g2t.giver == giver].sort_values('y_pred', ascending=False).taker[superstar:superstar+n_choice]
    return candidates
# FA@10
def alg_FA10(giver, takers, g2t, t2g):
    superstar = round(len(takers)*0.10)
    candidates = g2t[g2t.giver == giver].sort_values('y_pred', ascending=False).taker[superstar:superstar+n_choice]
    return candidates

### Data Preparation

In [None]:
def data_prep(df_giver, df_taker, exp_dir):

    df_giver['key'] = 1
    df_taker['key'] = 1

    cross_joined = pd.merge(df_giver, df_taker, on='key').drop('key', axis=1)
    id_y_column = cross_joined.pop('id_y')
    cross_joined.insert(1, 'id_y', id_y_column)
    df_giver = df_giver.drop("key", axis = 1)
    df_taker = df_taker.drop("key", axis = 1)
    col_list = cross_joined.columns.tolist()    
    selected_columns = cross_joined.loc[:, col_list[:81] + col_list[81+512:-512] + col_list[81:81+512] + col_list[-512:]]

    return selected_columns.reset_index(drop=True)

### Rating Inference with Prediction Model

In [None]:
def rating_inference(g2t_sample, exp_dir):
    
    # Data
    global x_train, y_train, x_test, y_test
    x_train = g2t_sample.iloc[:, 2:]
    y_train = g2t_sample.iloc[:, 1]   
    x_test = g2t_sample.iloc[:, 2:]
    y_test = g2t_sample.iloc[:, 1] 
    dataloaders = load_dataset(args)
    
    # Model Define
    args.exp_dir = exp_dir
    args.in_dim = x_test.shape[1]
    
    model = ComboNet(args)
    model_name = model.__class__.__name__
    model_dir = 'trained_models'
    model = model.float()
    model = nn.DataParallel(model)
    
    # Model Load
    loaded_state_dict = torch.load(path.join(model_dir, args.exp_dir+'.pth'))
    model.load_state_dict(loaded_state_dict)

    model.eval()
    model = model.to(device)

    # Rating Inference
    y_pred = []
    
    with torch.no_grad():
        for data in dataloaders['test']:

            profiles = data['profile']
            profiles = profiles.to(device)

            regression_output, classification_output = model(profiles)
            probs = F.softmax(classification_output, dim=1)
            cls = torch.from_numpy(np.array([[1.0, 2.0, 3.0, 4.0, 5.0]], dtype=float).T).to(device)
            expectation = torch.matmul(probs, cls.float()).view(-1).view(-1, 1)

            output = (2 * regression_output + expectation) / 3
            y_pred += output.to("cpu").detach().numpy().tolist()

    pred_df = g2t_sample.iloc[:, :2]
    pred_df['y_pred'] = [n[0] for n in y_pred]
    pred_df.columns = ['giver', 'taker', 'y_pred']
    
    return pred_df

### Functions for Simulation without Facial Editing

In [None]:
def simulation(givers, takers, algorithm, g2t, t2g):
    rec_list = []
    if algorithm == alg_CF: 
        cf_cand_dict = alg_CF(givers, takers, scores)

    for giver in givers:
        if algorithm == alg_CF: 
            candidates = cf_cand_dict[giver]
        else:
            candidates = algorithm(giver, takers, g2t, t2g)

        for taker in candidates:
            g2t_score = g2t[(g2t.giver == giver) & (g2t.taker == taker)].y_pred.item()
            g2t_like = (round(g2t_score) > like_threshold)
            t2g_score = t2g[(t2g.giver == taker) & (t2g.taker == giver)].y_pred.item()
            t2g_like = (round(t2g_score) > like_threshold)

            rec_list.append([int(giver), int(taker), g2t_score, g2t_like, t2g_score, t2g_like])

    rec_df = pd.DataFrame(rec_list, columns=['giver', 'taker', 'g2t_score', 'g2t_like', 't2g_score', 't2g_like'])        
    rec_df['match'] = False
    liked_df = rec_df[(rec_df['g2t_like'] == True) & (rec_df['t2g_like'] == True)]
    top_matches = liked_df.sort_values(by=['taker', 't2g_score'],
                                       ascending=[True, False]).groupby('taker').head(match_max)
    rec_df.loc[top_matches.index, 'match'] = True
    
    return rec_df


def gini_coefficient(counts):
    tmp = 0
    for i in counts:
        for j in counts:
            tmp += abs(i - j)
    return (tmp/(2*len(counts)*sum(counts)))


def sim_analysis(rec_df, gender):
    
    # Efficiency: Like, Match 
    like = len(rec_df[rec_df['g2t_like']])
    match = len(rec_df[rec_df['match']])

    # Fairness: Gini, High-tier rec.
    cand_list = rec_df['taker'].values.tolist()
    cand_cnt = Counter(cand_list)
    if gender == 'm2f':
        rank_df = pd.DataFrame(f_rank)
    else:
        rank_df = pd.DataFrame(m_rank)
    rank_df['count'] = rank_df.index.map(cand_cnt).fillna(0)

    gini_coef = gini_coefficient(rank_df['count'])
    mid_rank = rank_df.score.median()
    high_rate = rank_df[rank_df.score <= mid_rank]['count'].sum() / sum(cand_cnt.values())
    low_rate = rank_df[rank_df.score > mid_rank]['count'].sum() / sum(cand_cnt.values())
    
    return like, match, high_rate, low_rate, gini_coef


def lnm_count(givers, takers, g2t, t2g, gender):
    
    lnm_df = pd.DataFrame(columns=['algorithm', 'like', 'match', 'high_rate', 'low_rate', 'gini'])
    alg_list = [alg_random, alg_CF, alg_EM, alg_FA1, alg_FA2, alg_FA5, alg_FA10]
    score_mean = scores.groupby('taker')['score'].mean()
    rec_df_list = []
    
    for i, algorithm in enumerate(alg_list):
        
        rec_df = simulation(givers, takers, algorithm, g2t, t2g)
        like, match, high_rate, low_rate, gini_coef = sim_analysis(rec_df, gender)
        
        if gender == 'm2f':
            rec_df['g_rank'] = rec_df['giver'].map(m_rank)
            rec_df['t_rank'] = rec_df['taker'].map(f_rank)
        else:
            rec_df['g_rank'] = rec_df['giver'].map(f_rank)
            rec_df['t_rank'] = rec_df['taker'].map(m_rank)
        
        alg_name = algorithm.__name__.split('_')[1]
        lnm_df.loc[i] = [alg_name, like, match, high_rate, low_rate, gini_coef]
        rec_df['alg_type'] = alg_name
        rec_df_list.append(rec_df)
    
    log_df = pd.concat(rec_df_list, ignore_index=True)
        
    return {'result_df': lnm_df, 'rec_df': log_df}

### SHAP Explainer

In [None]:
def shap_explainer(model, dataset, args=args):
    x = dataset.drop(['score'], axis=1)
    y = dataset['score']
    
    x_np = x.values
    x_torch = torch.from_numpy(x_np).to(device).float()

    explainer = shap.DeepExplainer(model.reg_model, x_torch)
    shap_values = explainer.shap_values(x_torch)
    
    return shap_values

In [None]:
def explore_direction(gender, ids, dataset, args=args):
    
    # Loading Prediction Model
    model = ComboNet(args)
    model_name = model.__class__.__name__
    if gender == 'm2f':
        loaded_state_dict = torch.load('trained_models\\F3_M.pth')
    else:
        loaded_state_dict = torch.load('trained_models\\F3_F.pth')
    new_state_dict = OrderedDict()
    for n, v in loaded_state_dict.items():
        name = n.replace('module.', '')
        new_state_dict[name] = v
    model.load_state_dict(new_state_dict)
    model.eval()
    model = model.to(device)
    
    # Individual Direction
    shap_ind = pd.DataFrame(index=ids, columns=range(512)).fillna(0)
    for giver, row in shap_ind.iterrows():
        data = dataset[datset.giver == giver].drop(['giver', 'taker'], axis=1)
        shap_val = shap_explainer(model, data, args)
        x = data.drop(['score'], axis=1)
        mean_adjusted = x - x.mean(axis=0)
        diagonal = np.einsum('ij,ij->j', mean_adjusted, shap_val)
        arr = np.array(diagonal)
        top_feat = np.argsort(np.abs(arr))[::-1]
        cnt = 0
        for i in top_feat:
            if cnt == n_edit:
                break
            if i >= 670:   # Checking whether it is a facial feature
                row[i-670] += np.sign(arr[i])
                cnt += 1    
                
    # Group Direction
    top = shap_ind.sum().abs().sort_values(ascending=False).index[:n_edit]
    shap_avg = pd.DataFrame(index=ids, columns=range(512)).fillna(0)
    for giver, row in shap_avg.iterrows():
        for i in top:
            row[i] += np.sign(shap_m_ind.sum()[i])
            
    return shap_avg, shap_ind

### Function for Simulation with Facial Editing

In [None]:
def edit_simulation(gender, edit_direction, edit_strength=1, edit_both=True):

    if gender == 'm2f':
        rec_df = m2f_og['rec_df'].copy()
        giver_x = m_sample.set_index('id')
        taker_x = f_sample.set_index('id')
        gt_model = 'F3_M'
        tg_model = 'F3_F'
    else:
        rec_df = f2m_og['rec_df'].copy()
        giver_x = f_sample.set_index('id')
        taker_x = m_sample.set_index('id')
        gt_model = 'F3_F'
        tg_model = 'F3_M'
    
    gt_pair = rec_df[['giver', 'taker']]

    # Non-visual feature
    gt_x = pd.merge(left=gt_pair, right=giver_x.iloc[:, :-512], how='left', left_on='giver', right_index=True)
    gt_x = pd.merge(left=gt_x, right=taker_x.iloc[:, :-512], how='left', left_on='taker', right_index=True)
    # GAN-enabled facial feature
    gt_x = pd.merge(left=gt_x, right=giver_x.iloc[:, -512:], how='left', left_on='giver', right_index=True)
    gt_x = pd.merge(left=gt_x, right=taker_x.iloc[:, -512:], how='left', left_on='taker', right_index=True)
    gt_x = gt_x.reset_index(drop=True)
    edit_values = (edit_strength * emb_std.values) * edit_direction.loc[gt_x['giver']].values
    gt_x.iloc[:, -512:] += edit_values

    gt_pred_result = rating_inference(gt_x, gt_model)

    if edit_both:
        tg_pair = rec_df[['taker', 'giver']]
        # Non-visual feature
        tg_x = pd.merge(left=tg_pair, right=taker_x.iloc[:, :-512], how='left', left_on='taker', right_index=True)
        tg_x = pd.merge(left=tg_x, right=giver_x.iloc[:, :-512], how='left', left_on='giver', right_index=True)
        # GAN-enabled facial feature
        tg_x = pd.merge(left=tg_x, right=taker_x.iloc[:, -512:], how='left', left_on='taker', right_index=True)
        tg_x = pd.merge(left=tg_x, right=giver_x.iloc[:, -512:], how='left', left_on='giver', right_index=True)
        tg_x = tg_x.reset_index(drop=True)
        edit_values = (edit_strength * emb_std.values) * edit_direction.loc[tg_x['taker']].values
        tg_x.iloc[:, -512:] += edit_values

        tg_pred_result = rating_inference(tg_x, tg_model)
        
    edit_lnm_df = pd.DataFrame(columns=['algorithm', 'like', 'match', 'high_rate', 'low_rate', 'gini'])
    edit_rec_df_list = []
    
    alg_list = rec_df['alg_type'].unique()
    for i, alg_name in enumerate(alg_list):
        alg_index = (rec_df['alg_type'] == alg_name)
        edit_rec_df = rec_df.loc[alg_index, :].copy()
        edit_rec_df['g2t_score'] = gt_pred_result.iloc[(i*len(edit_rec_df)):((i+1)*len(edit_rec_df)), -1].tolist()
        edit_rec_df['g2t_like'] = (round(edit_rec_df['g2t_score']) > like_threshold)
        if edit_both:
            edit_rec_df['t2g_score'] = tg_pred_result.iloc[(i*len(edit_rec_df)):((i+1)*len(edit_rec_df)), -1].tolist()
            edit_rec_df['t2g_like'] = (round(edit_rec_df['t2g_score']) > like_threshold)

        edit_rec_df['match'] = False
        liked_df = edit_rec_df[(edit_rec_df['g2t_like'] == True) & (edit_rec_df['t2g_like'] == True)]
        top_matches = liked_df.sort_values(by=['taker', 't2g_score'],
                                           ascending=[True, False]).groupby('taker').head(match_max)
        edit_rec_df.loc[top_matches.index, 'match'] = True

        like, match, high_rate, low_rate, gini_coef = sim_analysis(edit_rec_df, gender)

        edit_lnm_df.loc[i] = [alg_name, like, match, high_rate, low_rate, gini_coef]
        edit_rec_df_list.append(edit_rec_df)

    edit_log_df = pd.concat(edit_rec_df_list, ignore_index=True)
        
    return {'result_df': edit_lnm_df, 'rec_df': edit_log_df}

### Matching Simulation

In [None]:
# Conditions for Matching Simulation
n_choice = 2          # Number of counterparts recommended to each user
like_threshold = 4    # Score criterion for deciding 'likes'  
match_max = 10        # Maximum number of matches each user can get
n_edit = 5            # Number of facial features modified in facial editing

In [None]:
m2f_list = []
f2m_list = []
m2f_rec_list = []
f2m_rec_list = []

for trial in tqdm(range(10)):
    
    # User Sampling
    m_sample = m_user.sample(n=780, random_state=trial)
    f_sample = f_user.sample(n=220, random_state=trial)
    m_ids = list(m_sample.id)
    f_ids = list(f_sample.id)
    # User Ranking
    m_score_mean = score_mean[score_mean.index.isin(m_ids)]
    m_rank = round(m_score_mean.rank(method='min', ascending=False, pct=True).sort_values()*100, 2)
    f_score_mean = score_mean[score_mean.index.isin(f_ids)]
    f_rank = round(f_score_mean.rank(method='min', ascending=False, pct=True).sort_values()*100, 2)

    # Data Preparation
    m2f_f3 = data_prep(m_sample, f_sample, 'F3_M')
    m_sample = m_sample.drop("key", axis=1)
    f_sample = f_sample.drop("key", axis=1)
    m2f_f3_pred = rating_inference(m2f_f3, 'F3_M')
    f2m_f3 = data_prep(f_sample, m_sample, 'F3_F')
    m_sample = m_sample.drop("key", axis=1)
    f_sample = f_sample.drop("key", axis=1)
    f2m_f3_pred = rating_inference(f2m_f3, 'F3_F')

    # Simulation without Facial Editing
    # Recommending Female to Male
    m2f_og = lnm_count(m_ids, f_ids, m2f_f3_pred, f2m_f3_pred, 'm2f')
    m2f_og['result_df']['edit_type'] = 'no_edit'
    m2f_og['rec_df']['edit_type'] = 'no_edit'
    m2f_list.append(m2f_og['result_df'])
    m2f_rec_list.append(m2f_og['rec_df'])
    # Recommending Male to Female
    f2m_og = lnm_count(f_ids, m_ids, f2m_f3_pred, m2f_f3_pred, 'f2m')
    f2m_og['result_df']['edit_type'] = 'no_edit'
    f2m_og['rec_df']['edit_type'] = 'no_edit'
    f2m_list.append(f2m_og['result_df'])
    f2m_rec_list.append(f2m_og['rec_df'])
    
    # Exploring Preference Direction
    shap_m_avg, shap_m_ind = explore_direction('m2f', m_ids, shap_m_f3, args)
    shap_f_avg, shap_f_ind = explore_direction('f2m', f_ids, shap_f_f3, args)
    avg_edit = pd.concat([shap_m_avg, shap_f_avg])
    ind_edit = pd.concat([shap_m_ind, shap_f_ind])
    emb_std = emb.set_index('id').std()
    
    # Simulation with Group Preference Editing
    # Female is Recommended to Male
    m2f_avg_both = edit_simulation(gender='m2f', edit_direction=avg_edit, edit_strength=1, edit_both=True)
    m2f_avg_both['result_df']['edit_type'] = 'avg_both'
    m2f_avg_both['rec_df']['edit_type'] = 'avg_both'
    m2f_list.append(m2f_avg_both['result_df'])
    m2f_rec_list.append(m2f_avg_both['rec_df'])
    # Male is Recommended to Female
    f2m_avg_both = edit_simulation(gender='f2m', edit_direction=avg_edit, edit_strength=1, edit_both=True)
    f2m_avg_both['result_df']['edit_type'] = 'avg_both'
    f2m_avg_both['rec_df']['edit_type'] = 'avg_both'
    f2m_list.append(f2m_avg_both['result_df'])
    f2m_rec_list.append(f2m_avg_both['rec_df'])
    
    # Simulation with Individual Preference Editing
    # Female is Recommended to Male
    m2f_ind_both = edit_simulation(gender='m2f', edit_direction=ind_edit, edit_strength=1, edit_both=True)
    m2f_ind_both['result_df']['edit_type'] = 'ind_both'
    m2f_ind_both['rec_df']['edit_type'] = 'ind_both'
    m2f_list.append(m2f_ind_both['result_df'])
    m2f_rec_list.append(m2f_ind_both['rec_df'])
    # Male is Recommended to Female
    f2m_ind_both = edit_simulation(gender='f2m', edit_direction=ind_edit, edit_strength=1, edit_both=True)
    f2m_ind_both['result_df']['edit_type'] = 'ind_both'
    f2m_ind_both['rec_df']['edit_type'] = 'ind_both'
    f2m_list.append(f2m_ind_both['result_df'])
    f2m_rec_list.append(f2m_ind_both['rec_df'])

    # Trial Record
    for df in m2f_list[-3:]:
        df['trial'] = trial
    for df in f2m_list[-3:]:
        df['trial'] = trial
        
    for df in m2f_rec_list[-3:]:
        df['trial'] = trial
    for df in f2m_rec_list[-3:]:
        df['trial'] = trial

# Gender Record
for df in m2f_list:
    df['gender'] = 'm2f'
for df in f2m_list:
    df['gender'] = 'f2m'
    
for df in m2f_rec_list:
    df['gender'] = 'm2f'
for df in f2m_rec_list:
    df['gender'] = 'f2m'
        
sim_result = pd.concat(m2f_list+f2m_list, ignore_index=True)
sim_log = pd.concat(m2f_rec_list+f2m_rec_list, ignore_index=True)