In [None]:
import matplotlib.pyplot as plt

In [None]:
from matplotlib import rc
#rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']})
rc('font',**{'family':'serif',
             'serif':['Times New Roman'],
             'size': 12,
            })

In [None]:
def compute_ranking_accuracy(df_, margin=0):
    # Split in non-ties and ties
    df_nonties = df_[df_.label_r != 0]
    df_ties = df_[df_.label_r == 0]

    # Split non ties per their outcome (left and right)
    df_nonties_left = df_nonties[(df_nonties.label_r == -1)]
    df_nonties_right = df_nonties[(df_nonties.label_r == 1)]

    # Non-ties accuracy
    correct_left = ((df_nonties.label_r == -1) & (df_nonties.rank_left - df_nonties.rank_right > margin)).sum()
    correct_right = ((df_nonties.label_r == 1) & (df_nonties.rank_right - df_nonties.rank_left > margin)).sum()

    nontie_left_accuracy = correct_left / (df_nonties.label_r == -1).sum()
    nontie_right_accuracy = correct_right / (df_nonties.label_r == 1).sum()
    nontie_accuracy = (correct_left + correct_right ) / df_nonties.shape[0]
    
    # Ties accuracy
    tie_accuracy = (abs(df_ties.rank_left - df_ties.rank_right) < margin).sum() / df_ties.shape[0]

    # Overall accuracy
    overall_accuracy = X_test[((df_.label_r == -1) & (df_.rank_left - df_.rank_right > margin)) |
                              ((df_.label_r ==  1) & (df_.rank_right - df_.rank_left > margin)) |
                              ((df_.label_r ==  0) & (abs(df_.rank_left - df_.rank_right) < margin))].shape[0] / df_.shape[0]
    
    return nontie_left_accuracy, nontie_right_accuracy, nontie_accuracy, tie_accuracy, overall_accuracy

In [None]:
def compute_ranking_accuracy_nomargin(df_,):
    # Split in non-ties and ties
    df_nonties = df_[df_.label_r != 0]

    # Split non ties per their outcome (left and right)
    df_nonties_left = df_nonties[(df_nonties.label_r == -1)]
    df_nonties_right = df_nonties[(df_nonties.label_r == 1)]

    # Non-ties accuracy
    correct_left = ((df_nonties.label_r == -1) & (df_nonties.rank_left - df_nonties.rank_right > 0)).sum()
    correct_right = ((df_nonties.label_r == 1) & (df_nonties.rank_right - df_nonties.rank_left > 0)).sum()

    nontie_left_accuracy = correct_left / (df_nonties.label_r == -1).sum()
    nontie_right_accuracy = correct_right / (df_nonties.label_r == 1).sum()
    nontie_accuracy = (correct_left + correct_right ) / df_nonties.shape[0]

    return nontie_left_accuracy, nontie_right_accuracy, nontie_accuracy

In [None]:
def compute_ranking_distance(df_):
    # Split in non-ties and ties
    df_nonties = df_[df_.label_r != 0]
    df_ties = df_[df_.label_r == 0]

    # Distance between non-ties
    avg_dist_nonties = abs(df_nonties.rank_left - df_nonties.rank_right).mean()
    
    # Distance between ties
    avg_dist_ties = abs(df_ties.rank_left - df_ties.rank_right).mean()
    
    return avg_dist_nonties, avg_dist_ties

In [None]:
def compute_ranking_distance_all(df_):
    # Split in non-ties and ties
    df_nonties = df_[df_.label_r != 0]
    df_ties = df_[df_.label_r == 0]

    # Distance between non-ties
    avg_dist_nonties = abs(df_nonties.rank_left - df_nonties.rank_right).values
    
    # Distance between ties
    avg_dist_ties = abs(df_ties.rank_left - df_ties.rank_right).values
    
    all_avg_dist = np.concatenate((avg_dist_nonties, avg_dist_ties)).mean()
    
    return all_avg_dist

In [None]:
def compute_ranking_distance_sum(df_):
    # Split in non-ties and ties
    df_nonties = df_[df_.label_r != 0]
    df_ties = df_[df_.label_r == 0]

    # Distance between non-ties
    sum_dist_nonties = abs(df_nonties.rank_left - df_nonties.rank_right).sum()
    
    # Distance between ties
    sum_dist_ties = abs(df_ties.rank_left - df_ties.rank_right).sum()
    
    return sum_dist_nonties, sum_dist_ties

In [None]:
def compute_classification_accuracy(df_):
    def softmax(x):
        e_x = np.exp(x - np.max(x))
        return e_x / e_x.sum(axis=0)
    if 'logits_0' in df_.columns:
        col1_values = df_['logits_l'].values
        col2_values = df_['logits_0'].values
        col3_values = df_['logits_r'].values
            
        probabilities = np.apply_along_axis(softmax, axis=1, arr=np.column_stack((col1_values, col2_values, col3_values)))
        max_indices = np.argmax(probabilities, axis=1)
        # Convert the probabilities back to a DataFrame with appropriate column names
        softmax_df = pd.DataFrame(probabilities, columns=['softmax_logit_l', 'softmax_logit_0', 'softmax_logit_r'])
        max_index_df = pd.DataFrame({'class_predicted': max_indices})
        # Concatenate the new DataFrame with the original DataFrame if needed
        result_df = pd.concat([df.reset_index(drop=True), softmax_df, max_index_df], axis=1,)
    
        all_accuracy = (result_df.class_predicted == result_df.label_c).sum() / result_df.shape[0]
        tie_accuracy = (result_df[result_df.label_c == 1].class_predicted == result_df[result_df.label_c == 1].label_c).sum() / result_df.shape[0]
        nontie_accuracy = (result_df[result_df.label_c != 1].class_predicted == result_df[result_df.label_c != 1].label_c).sum() / result_df.shape[0]
    
    # Without ties
    else:
        col1_values = df_['logits_l'].values
        col3_values = df_['logits_r'].values
            
        probabilities = np.apply_along_axis(softmax, axis=1, arr=np.column_stack((col1_values, col3_values)))
        max_indices = np.argmax(probabilities, axis=1)
        # Convert the probabilities back to a DataFrame with appropriate column names
        softmax_df = pd.DataFrame(probabilities, columns=['softmax_logit_l', 'softmax_logit_r'])
        max_index_df = pd.DataFrame({'class_predicted': max_indices})
        # Concatenate the new DataFrame with the original DataFrame if needed
        result_df = pd.concat([df.reset_index(drop=True), softmax_df, max_index_df], axis=1,)
    
        all_accuracy = (result_df.class_predicted == result_df.label_c).sum() / result_df.shape[0]
        tie_accuracy = 0
        nontie_accuracy = all_accuracy
        
    return all_accuracy, tie_accuracy, nontie_accuracy

# Margins Analysis

## Trained with ties

In [None]:
model_results = glob('../outputs/saved/margin-*.pkl')
compiled_results = []
for model_result in model_results:
    df = pd.read_pickle(model_result)
    margin_ = float(os.path.basename(model_result).replace('margin-', '').replace('.pt_results.pkl', ''))
    seed = 30
    print('Margin:', margin_, '-->', model_result)

    X_train, X_test = train_test_split(df, test_size=0.2, random_state=seed)
    X_train, X_val  = train_test_split(X_train, test_size=0.13, random_state=seed)
    # print('\tTrain:     ', X_train.shape)
    # print('\tValidation:', X_val.shape) 
    # print('\tTest:      ', X_test.shape)

    # Ranking sub-network
    nontie_left_accuracy, nontie_right_accuracy, nontie_accuracy, tie_accuracy, overall_accuracy = compute_ranking_accuracy(X_test, margin=margin_)

    # Ranking sub-network, without any margin on accuracy
    nontie_left_accuracy_nomargin, nontie_right_accuracy_nomargin, nontie_accuracy_nomargin = compute_ranking_accuracy_nomargin(X_test)

    # Classification sub-network
    c_all_accuracy, c_tie_accuracy, c_nontie_accuracy = compute_classification_accuracy(X_test)

    # Rank difference
    avg_dist_nonties, avg_dist_ties = compute_ranking_distance(X_test)
    sum_dist_nonties, sum_dist_ties = compute_ranking_distance(X_test)
    
    
    # Compile results
    result = {
        'margin': margin_,
        'seed': seed,
        # Ranking, with margins
        'ranking_acc': overall_accuracy,
        'ranking_acc_nonties': nontie_accuracy,
        'ranking_acc_ties': tie_accuracy,
        'ranking_acc_left': nontie_left_accuracy,
        'ranking_acc_right': nontie_right_accuracy,
        # Ranking, without margins
        'ranking_acc_nonties_nomargin': nontie_accuracy_nomargin,
        'ranking_acc_left_nomargin': nontie_left_accuracy_nomargin,
        'ranking_acc_right_nomargin': nontie_right_accuracy_nomargin,
        # Classification
        'classification_acc': c_all_accuracy,
        'classification_acc_nonties': c_nontie_accuracy,
        'classification_acc_ties': c_tie_accuracy,
        # Rank difference
        'avg_dist_nonties': avg_dist_nonties,
        'sum_dist_nonties': sum_dist_nonties,
        'avg_dist_ties': avg_dist_ties,
        'sum_dist_ties': sum_dist_ties,
    }
    compiled_results.append(result)
    
results_df = pd.DataFrame(compiled_results)

In [None]:
df_withties = results_df.sort_values(by=['margin'])
df_withties

## Trained without ties

In [None]:
model_results = glob('../outputs/saved/_noties-margin-*.pkl')
compiled_results = []
for model_result in model_results:
    df = pd.read_pickle(model_result)
    margin_ = float(os.path.basename(model_result).replace('_noties-margin-', '').replace('.pt_results.pkl', ''))
    seed = 30
    print('Margin:', margin_, '-->', model_result)

    X_train, X_test = train_test_split(df, test_size=0.2, random_state=seed)
    X_train, X_val  = train_test_split(X_train, test_size=0.13, random_state=seed)
    # print('\tTrain:     ', X_train.shape)
    # print('\tValidation:', X_val.shape) 
    # print('\tTest:      ', X_test.shape)

    # Ranking sub-network
    nontie_left_accuracy, nontie_right_accuracy, nontie_accuracy, tie_accuracy, overall_accuracy = compute_ranking_accuracy(X_test, margin=margin_)

    # Ranking sub-network, without any margin on accuracy
    nontie_left_accuracy_nomargin, nontie_right_accuracy_nomargin, nontie_accuracy_nomargin = compute_ranking_accuracy_nomargin(X_test)

    # Classification sub-network
    c_all_accuracy, c_tie_accuracy, c_nontie_accuracy = compute_classification_accuracy(X_test)

    # Rank difference
    avg_dist_nonties, avg_dist_ties = compute_ranking_distance(X_test)
    sum_dist_nonties, sum_dist_ties = compute_ranking_distance(X_test)
    
    
    # Compile results
    result = {
        'margin': margin_,
        'seed': seed,
        # Ranking, with margins
        'ranking_acc': overall_accuracy,
        'ranking_acc_nonties': nontie_accuracy,
        'ranking_acc_ties': tie_accuracy,
        'ranking_acc_left': nontie_left_accuracy,
        'ranking_acc_right': nontie_right_accuracy,
        # Ranking, without margins
        'ranking_acc_nonties_nomargin': nontie_accuracy_nomargin,
        'ranking_acc_left_nomargin': nontie_left_accuracy_nomargin,
        'ranking_acc_right_nomargin': nontie_right_accuracy_nomargin,
        # Classification
        'classification_acc': c_all_accuracy,
        'classification_acc_nonties': c_nontie_accuracy,
        'classification_acc_ties': c_tie_accuracy,
        # Rank difference
        'avg_dist_nonties': avg_dist_nonties,
        'sum_dist_nonties': sum_dist_nonties,
        'avg_dist_ties': avg_dist_ties,
        'sum_dist_ties': sum_dist_ties,
    }
    compiled_results.append(result)
    
results_df = pd.DataFrame(compiled_results)

In [None]:
df_noties = results_df.sort_values(by=['margin'])
df_noties

In [None]:
plt.figure(figsize=(5,3.5))
plt.plot(df_withties.margin, df_withties.ranking_acc, 'k',                    label='Train w/ ties. Acc. w/ margin')
plt.plot(df_withties.margin, df_withties.ranking_acc_nonties_nomargin, 'k--', label='Trained w/ ties. Acc. w/o margin', )
plt.plot(df_noties.margin, df_noties.ranking_acc, 'r',                        label='Train w/o ties. Acc. w/ margin', )
plt.plot(df_noties.margin, df_noties.ranking_acc_nonties_nomargin, 'r--',     label='Trained w/o ties. Acc. w/o margin', )
plt.xlabel(r'$\gamma$')
plt.ylabel('Accuracy (%)')
plt.legend(fontsize="9")
plt.show()

In [None]:
fig = plt.figure(figsize=(5,3.5))
plt.plot(df_withties.margin, df_withties.ranking_acc_nonties_nomargin, 'k', label='Trained w/ ties.', )
plt.plot(df_noties.margin, df_noties.ranking_acc_nonties_nomargin, 'r',     label='Trained w/o ties.', )

plt.plot(qq.margin, [qq.ranking_acc_nonties[0] for i in range(0, 21, 1)], 'b--',     )

plt.xlabel(r'$\gamma$')
plt.ylabel('Accuracy (%)')
plt.legend(fontsize="11")
plt.show()
fig.savefig('accuracy_nomargin.png', dpi=fig.dpi, bbox_inches="tight")

In [None]:
print('Average accuracy difference between trained with and without ties:', (df_withties[df_withties.margin>0.4].ranking_acc_nonties_nomargin.reset_index(drop=True) - df_noties[df_noties.margin>0.4].ranking_acc_nonties_nomargin.reset_index(drop=True)).mean())

In [None]:
fig = plt.figure(figsize=(5,3.5))
plt.plot(df_withties.margin, df_withties.ranking_acc, 'k',                    )#label='Trained w/ ties. ')
#plt.plot(df_withties.margin, df_withties.ranking_acc_nonties_nomargin, 'k--', label='Trained w/ ties. Acc. w/o margin', )
plt.plot(df_noties.margin, df_noties.ranking_acc, 'r',                        label='Trained w/o ties. ', )
#plt.plot(df_noties.margin, df_noties.ranking_acc_nonties_nomargin, 'r--',     label='Trained w/o ties. Acc. w/o margin', )
plt.xlabel(r'$\gamma$')
plt.ylabel('Accuracy (%)')
plt.legend(fontsize="11")
plt.show()
fig.savefig('accuracy_margin.png', dpi=fig.dpi, bbox_inches="tight")

In [None]:
plt.figure(figsize=(5,3))
plt.plot(df_withties.margin, df_withties.avg_dist_ties, 'k', label='Train w/ ties. Tie obs.')
plt.plot(df_withties.margin, df_withties.avg_dist_nonties, 'k--', label='Train w/ ties. Non-tie obs.', )
plt.plot(df_noties.margin, df_noties.avg_dist_ties, 'r', label='Train w/o ties. Ties obs.')
plt.plot(df_noties.margin, df_noties.avg_dist_nonties, 'r--', label='Train w/o ties. Non-ties obs.')
plt.xlabel(r'$\gamma$')
plt.ylabel('Average Rank Difference')
plt.legend(fontsize="9")
plt.show()

In [None]:
fig = plt.figure(figsize=(5,3))
plt.plot(df_withties.margin, df_withties.avg_dist_ties,    'k',   label='Ties')
plt.plot(df_withties.margin, df_withties.avg_dist_nonties, 'k--', label='Non-ties', )
plt.plot([0., 2.], [0., 2.], 'b:', )
#plt.plot(df_noties.margin, df_noties.avg_dist_ties, 'r', label='Train w/o ties. Ties obs.')
#plt.plot(df_noties.margin, df_noties.avg_dist_nonties, 'r--', label='Train w/o ties. Non-ties obs.')
plt.xlabel(r'$\gamma$')
plt.ylabel('Average Rank Difference\n in Comparisons')
plt.legend(fontsize="9")
plt.show()
fig.savefig('rank_difference.png', dpi=fig.dpi, bbox_inches="tight")

In [None]:
plt.figure(figsize=(5,3))
plt.plot(df_withties.margin, df_withties.sum_dist_ties / df_withties.sum_dist_nonties, 'k', label='Train w/ ties. Tie obs.')
plt.plot(df_noties.margin, df_noties.sum_dist_ties / df_noties.sum_dist_nonties, 'r', label='Train w/ ties. Tie obs.')
#plt.plot(df_withties.margin, df_withties.avg_dist_nonties, 'k--', label='Train w/ ties. Non-tie obs.', )
#plt.plot(df_noties.margin, df_noties.avg_dist_ties, 'r', label='Train w/o ties. Ties obs.')
#plt.plot(df_noties.margin, df_noties.avg_dist_nonties, 'r--', label='Train w/o ties. Non-ties obs.')
plt.xlabel(r'$\gamma$')
plt.ylabel('Average Rank Difference')
plt.legend(fontsize="9")
plt.show()

# What happens with wrongly classified observations?

## With ties

In [None]:
compiled_results = []
for model_result in glob('../outputs/saved/margin-*.pkl'):
    df = pd.read_pickle(model_result)
    margin = float(os.path.basename(model_result).replace('margin-', '').replace('.pt_results.pkl', ''))
    seed = 30
    print('Margin:', margin, '-->', model_result)

    X_train, X_test = train_test_split(df, test_size=0.2, random_state=seed)
    X_train, X_val  = train_test_split(X_train, test_size=0.13, random_state=seed)
    # print('\tTrain:     ', X_train.shape)
    # print('\tValidation:', X_val.shape) 
    # print('\tTest:      ', X_test.shape)

    collection = []
    for image in X_test.image_left.unique():
        rank_image = X_test[X_test.image_left == image].iloc[0].rank_left
        collection.append({
                'image': image,
                'rank': rank_image, 
                'ties': ties,
                'margin': margin_
            }
        )
    for image in X_test.image_right.unique():
        rank_image = X_test[X_test.image_right == image].iloc[0].rank_right
        collection.append({
                'image': image,
                'rank': rank_image, 
                'ties': ties,
                'margin': margin_          
            }
        )
    image_ranks = pd.DataFrame(collection).drop_duplicates()
    std = image_ranks['rank'].std()
    min_max = image_ranks['rank'].max() - image_ranks['rank'].min()
    
    # Split in non-ties and ties
    df_nonties = X_test[X_test.label_r != 0].copy()
    df_ties = X_test[X_test.label_r == 0].copy()

    df_nonties['correct_left'] = ((df_nonties.label_r == -1) & (df_nonties.rank_left - df_nonties.rank_right > margin))
    df_nonties['correct_right'] = ((df_nonties.label_r == 1) & (df_nonties.rank_right - df_nonties.rank_left > margin))
    df_ties['correct_tie'] = (abs(df_ties.rank_left - df_ties.rank_right) < margin)
    
    df_nonties['correct_nontie'] = (df_nonties['correct_left'] == True) | (df_nonties['correct_right'] == True)
    
    errados_nontie = df_nonties[df_nonties['correct_nontie'] == False]
    errados_tie    = df_ties[df_ties['correct_tie'] == False]
    errados_all    = pd.concat((errados_nontie, errados_tie))
    
    avg_dist_nonties, _ = compute_ranking_distance(errados_nontie)
    _, avg_dist_ties_ = compute_ranking_distance(errados_tie)
    avg_dist_all = compute_ranking_distance_all(errados_all)

    compiled_results.append({
        'margin': margin,
        'avg_dist_nonties': avg_dist_nonties,
        'avg_dist_ties': avg_dist_ties_,
        'avg_dist_all': avg_dist_all,
        'std': std,
        'min_max': min_max,
    })

df_errados_withties = pd.DataFrame(compiled_results).sort_values(['margin'])

## Without ties

In [None]:
compiled_results = []
for model_result in glob('../outputs/saved/_noties-margin-*.pkl'):
    df = pd.read_pickle(model_result)
    margin = float(os.path.basename(model_result).replace('_noties-margin-', '').replace('.pt_results.pkl', ''))
    seed = 30
    print('Margin:', margin, '-->', model_result)

    X_train, X_test = train_test_split(df, test_size=0.2, random_state=seed)
    X_train, X_val  = train_test_split(X_train, test_size=0.13, random_state=seed)
    # print('\tTrain:     ', X_train.shape)
    # print('\tValidation:', X_val.shape) 
    # print('\tTest:      ', X_test.shape)


    collection = []
    for image in X_test.image_left.unique():
        rank_image = X_test[X_test.image_left == image].iloc[0].rank_left
        collection.append({
                'image': image,
                'rank': rank_image, 
                'ties': ties,
                'margin': margin_
            }
        )
    for image in X_test.image_right.unique():
        rank_image = X_test[X_test.image_right == image].iloc[0].rank_right
        collection.append({
                'image': image,
                'rank': rank_image, 
                'ties': ties,
                'margin': margin_          
            }
        )
    image_ranks = pd.DataFrame(collection).drop_duplicates()
    std = image_ranks['rank'].std()
    min_max = image_ranks['rank'].max() - image_ranks['rank'].min()
    
    # Split in non-ties and ties
    df_nonties = X_test[X_test.label_r != 0].copy()
    df_ties = X_test[X_test.label_r == 0].copy()

    df_nonties['correct_left'] = ((df_nonties.label_r == -1) & (df_nonties.rank_left - df_nonties.rank_right > margin))
    df_nonties['correct_right'] = ((df_nonties.label_r == 1) & (df_nonties.rank_right - df_nonties.rank_left > margin))
    df_ties['correct_tie'] = (abs(df_ties.rank_left - df_ties.rank_right) < margin)
    
    df_nonties['correct_nontie'] = (df_nonties['correct_left'] == True) | (df_nonties['correct_right'] == True)

    

    
    errados_nontie = df_nonties[df_nonties['correct_nontie'] == False]
    errados_tie    = df_ties[df_ties['correct_tie'] == False]
    errados_all    = pd.concat((errados_nontie, errados_tie))

    # print(errados_nontie.shape, errados_tie.shape, errados_all.shape)
    
    avg_dist_nonties, _ = compute_ranking_distance(errados_nontie)
    _, avg_dist_ties_ = compute_ranking_distance(errados_tie)
    avg_dist_all = compute_ranking_distance_all(errados_all)

    compiled_results.append({
        'margin': margin,
        'avg_dist_nonties': avg_dist_nonties,
        'avg_dist_ties': avg_dist_ties_,
        'avg_dist_all': avg_dist_all,
        'std': std,
        'min_max': min_max,
    })

df_errados_noties = pd.DataFrame(compiled_results).sort_values(['margin'])

## Plot

### Average Absolute Error

In [None]:
fig = plt.figure(figsize=(5,3.5))
plt.plot(df_errados_withties.margin[1:], (df_errados_withties.margin - df_errados_withties.avg_dist_all).abs()[1:], 'k', label='Trained w/ ties.')
plt.plot(df_errados_noties.margin[1:],   (df_errados_noties.margin - df_errados_noties.avg_dist_all).abs()[1:]    , 'r', label='Trained w/o ties.', )
#plt.plot([0, 2],   [0,2]    , 'b--', label=r'$\gamma$', )
plt.xlabel(r'$\gamma$')
plt.ylabel('Average Absolute Error')
plt.legend(fontsize="11")
plt.show()
fig.savefig('error_misclassified_margin.png', dpi=fig.dpi, bbox_inches="tight")

### Average Absolute Error/Margin

In [None]:
fig = plt.figure(figsize=(5,3.5))
plt.plot(df_errados_withties.margin, (df_errados_withties.margin - df_errados_withties.avg_dist_all).abs() / (df_errados_withties['margin']+1e-14), 'k', label='Trained w/ ties.')
plt.plot(df_errados_noties.margin,   (df_errados_noties.margin - df_errados_noties.avg_dist_all).abs()     / (df_errados_noties['margin']+1e-14)  , 'r', label='Trained w/o ties.', )
plt.xlabel(r'$\gamma$')
plt.ylabel(r'Average Absolute Error / $\gamma$')
plt.ylim([0, 5])
plt.legend(fontsize="11")
plt.show()
fig.savefig('error_misclassified_margin.png', dpi=fig.dpi, bbox_inches="tight")

In [None]:
fig = plt.figure(figsize=(5,3.5))
plt.plot(df_errados_withties.margin, (df_errados_withties.margin - df_errados_withties.avg_dist_nonties).abs(), 'k', label='Trained w/ ties. Non-ties ')
plt.plot(df_errados_withties.margin, (df_errados_withties.margin - df_errados_withties.avg_dist_ties   ).abs(), 'k--',  label='Trained w/ ties. Ties', )

plt.plot(df_errados_noties.margin,   (df_errados_noties.margin - df_errados_noties.avg_dist_nonties).abs(), 'r',   label='Trained w/o ties. Non-ties', )
plt.plot(df_errados_noties.margin,   (df_errados_noties.margin - df_errados_noties.avg_dist_ties   ).abs(), 'r--',    label='Trained w/o ties. Ties', )
plt.xlabel(r'$\gamma$')
plt.ylabel(' Asolute Error')
plt.legend(fontsize="11")
plt.show()
#fig.savefig('accuracy_margin.png', dpi=fig.dpi, bbox_inches="tight")

In [None]:
fig = plt.figure(figsize=(5,3.5))
plt.plot(df_errados_withties.margin, df_errados_withties.avg_dist_nonties, 'k', label='Trained w/ ties. Non-ties ')
plt.plot(df_errados_withties.margin, df_errados_withties.avg_dist_ties   , 'k--',  label='Trained w/ ties. Ties', )

plt.plot(df_errados_noties.margin,   df_errados_noties.avg_dist_nonties, 'r',   label='Trained w/o ties. Non-ties', )
plt.plot(df_errados_noties.margin,   df_errados_noties.avg_dist_ties   , 'r--',    label='Trained w/o ties. Ties', )
plt.xlabel(r'$\gamma$')
plt.ylabel('Rank Difference')
plt.legend(fontsize="11")
plt.show()
#fig.savefig('accuracy_margin.png', dpi=fig.dpi, bbox_inches="tight")

# Balanced ties and non-ties (0.33 for each class at test time)

## With ties

In [None]:
model_results = glob('../outputs/saved/margin-*.pkl')
compiled_results = []
for model_result in model_results:
    df = pd.read_pickle(model_result)
    margin_ = float(os.path.basename(model_result).replace('margin-', '').replace('.pt_results.pkl', ''))
    seed = 30
    print('Margin:', margin_, '-->', model_result)

    X_train, X_test = train_test_split(df, test_size=0.2, random_state=seed)
    X_train, X_val  = train_test_split(X_train, test_size=0.13, random_state=seed)
    # print('\tTrain:     ', X_train.shape)
    # print('\tValidation:', X_val.shape) 
    # print('\tTest:      ', X_test.shape)

    X_test = pd.concat((X_test[(X_test.label_r == -1) | (X_test.label_r == 1)].sample(600, random_state=seed), X_test[(X_test.label_r == 0)])).reset_index().drop(columns=['index'])

    # Ranking sub-network
    nontie_left_accuracy, nontie_right_accuracy, nontie_accuracy, tie_accuracy, overall_accuracy = compute_ranking_accuracy(X_test, margin=margin_)

    # Ranking sub-network, without any margin on accuracy
    nontie_left_accuracy_nomargin, nontie_right_accuracy_nomargin, nontie_accuracy_nomargin = compute_ranking_accuracy_nomargin(X_test)

    # Classification sub-network
    c_all_accuracy, c_tie_accuracy, c_nontie_accuracy = compute_classification_accuracy(X_test)

    # Rank difference
    avg_dist_nonties, avg_dist_ties = compute_ranking_distance(X_test)
    sum_dist_nonties, sum_dist_ties = compute_ranking_distance(X_test)
    
    
    # Compile results
    result = {
        'margin': margin_,
        'seed': seed,
        # Ranking, with margins
        'ranking_acc': overall_accuracy,
        'ranking_acc_nonties': nontie_accuracy,
        'ranking_acc_ties': tie_accuracy,
        'ranking_acc_left': nontie_left_accuracy,
        'ranking_acc_right': nontie_right_accuracy,
        # Ranking, without margins
        'ranking_acc_nonties_nomargin': nontie_accuracy_nomargin,
        'ranking_acc_left_nomargin': nontie_left_accuracy_nomargin,
        'ranking_acc_right_nomargin': nontie_right_accuracy_nomargin,
        # Classification
        'classification_acc': c_all_accuracy,
        'classification_acc_nonties': c_nontie_accuracy,
        'classification_acc_ties': c_tie_accuracy,
        # Rank difference
        'avg_dist_nonties': avg_dist_nonties,
        'sum_dist_nonties': sum_dist_nonties,
        'avg_dist_ties': avg_dist_ties,
        'sum_dist_ties': sum_dist_ties,
    }
    compiled_results.append(result)
    
results_df = pd.DataFrame(compiled_results)

In [None]:
aa = results_df.sort_values(['margin'])

## Without ties

In [None]:
model_results = glob('../outputs/saved/_noties-margin-*.pkl')
compiled_results = []
for model_result in model_results:
    df = pd.read_pickle(model_result)
    margin_ = float(os.path.basename(model_result).replace('_noties-margin-', '').replace('.pt_results.pkl', ''))
    seed = 30
    print('Margin:', margin_, '-->', model_result)

    X_train, X_test = train_test_split(df, test_size=0.2, random_state=seed)
    X_train, X_val  = train_test_split(X_train, test_size=0.13, random_state=seed)
    # print('\tTrain:     ', X_train.shape)
    # print('\tValidation:', X_val.shape) 
    # print('\tTest:      ', X_test.shape)

    X_test = pd.concat((X_test[(X_test.label_r == -1) | (X_test.label_r == 1)].sample(600, random_state=seed), X_test[(X_test.label_r == 0)])).reset_index().drop(columns=['index'])

    
    # Ranking sub-network
    nontie_left_accuracy, nontie_right_accuracy, nontie_accuracy, tie_accuracy, overall_accuracy = compute_ranking_accuracy(X_test, margin=margin_)

    # Ranking sub-network, without any margin on accuracy
    nontie_left_accuracy_nomargin, nontie_right_accuracy_nomargin, nontie_accuracy_nomargin = compute_ranking_accuracy_nomargin(X_test)

    # Classification sub-network
    c_all_accuracy, c_tie_accuracy, c_nontie_accuracy = compute_classification_accuracy(X_test)

    # Rank difference
    avg_dist_nonties, avg_dist_ties = compute_ranking_distance(X_test)
    sum_dist_nonties, sum_dist_ties = compute_ranking_distance(X_test)
    
    
    # Compile results
    result = {
        'margin': margin_,
        'seed': seed,
        # Ranking, with margins
        'ranking_acc': overall_accuracy,
        'ranking_acc_nonties': nontie_accuracy,
        'ranking_acc_ties': tie_accuracy,
        'ranking_acc_left': nontie_left_accuracy,
        'ranking_acc_right': nontie_right_accuracy,
        # Ranking, without margins
        'ranking_acc_nonties_nomargin': nontie_accuracy_nomargin,
        'ranking_acc_left_nomargin': nontie_left_accuracy_nomargin,
        'ranking_acc_right_nomargin': nontie_right_accuracy_nomargin,
        # Classification
        'classification_acc': c_all_accuracy,
        'classification_acc_nonties': c_nontie_accuracy,
        'classification_acc_ties': c_tie_accuracy,
        # Rank difference
        'avg_dist_nonties': avg_dist_nonties,
        'sum_dist_nonties': sum_dist_nonties,
        'avg_dist_ties': avg_dist_ties,
        'sum_dist_ties': sum_dist_ties,
    }
    compiled_results.append(result)
    
results_df = pd.DataFrame(compiled_results)

In [None]:
bb = results_df.sort_values(['margin'])

In [None]:
fig = plt.figure(figsize=(5,3.5))
plt.plot(aa.margin, aa.ranking_acc, 'k',                    label='Trained w/ ties. ')
#plt.plot(df_withties.margin, df_withties.ranking_acc_nonties_nomargin, 'k--', label='Trained w/ ties. Acc. w/o margin', )
plt.plot(bb.margin, bb.ranking_acc, 'r',                        label='Trained w/o ties. ', )
#plt.plot(df_noties.margin, df_noties.ranking_acc_nonties_nomargin, 'r--',     label='Trained w/o ties. Acc. w/o margin', )
plt.xlabel(r'$\gamma$')
plt.ylabel('Accuracy (%)')
plt.legend(fontsize="11")
plt.show()
fig.savefig('accuracy_margin.png', dpi=fig.dpi, bbox_inches="tight")

# Distribution of Ranks

## Margin=0.7

### Trained without ties

In [None]:
model_result = glob('../outputs/saved/_noties-margin-0.7*.pkl')[0]
print('Loading model:', model_result)
df_noties = pd.read_pickle(model_result)
df_noties['rank_diff'] = (df_noties.rank_left - df_noties.rank_right)#.abs()

In [None]:
fig = plt.figure(figsize=(3,1.5))
plt.hist(df_noties.rank_diff, 25, density=1)
plt.axvline(x=.7, color = 'k', label=r'$\gamma$')
plt.title(r'Rank difference density ($\gamma$=.7). All data')
#plt.xlim([0, 10])
#plt.ylim([0, 1.3])
plt.show()

In [None]:
fig = plt.figure(figsize=(3,1.5))
plt.hist(df_noties[df_noties.label_r == 0].rank_diff, 25, density=1, color='green', alpha=1, label='Ties')
plt.hist(df_noties[df_noties.label_r != 0].rank_diff, 25, density=1, color='red', alpha=0.7, label='Non-ties')
plt.axvline(x=.7, color='k', label=r'$\gamma$=0.7')
plt.axvline(x=-.7, color='k', label=r'$\gamma$=0.7')
plt.legend(fontsize="9")
#plt.xlim([0, 6])
#plt.ylim([0, 1.3])
plt.show()

### Trained with ties

In [None]:
model_result = glob('../outputs/saved/margin-0.7*.pkl')[0]
print('Loading model:', model_result)
df_ties = pd.read_pickle(model_result)
df_ties['rank_diff'] = (df_ties.rank_left - df_ties.rank_right)#.abs()

In [None]:
fig = plt.figure(figsize=(3,1.5))
plt.hist(df_ties.rank_diff, 25, density=1)
plt.axvline(x=.7, color='k', label=r'$\gamma$')
plt.title(r'Rank difference density ($\gamma$=.7). All data')
#plt.xlim([0, 10])
#plt.ylim([0, 1.3])
plt.show()

In [None]:
fig = plt.figure(figsize=(3,1.5))
plt.hist(df_ties[df_ties.label_r == 0].rank_diff, 25, density=1, color='green', alpha=1, label='Ties')
plt.hist(df_ties[df_ties.label_r != 0].rank_diff, 25, density=1, color='red', alpha=0.7, label='Non-ties')
plt.axvline(x=.7, color='k', label=r'$\gamma$=0.7')
plt.axvline(x=-.7, color='k', label=r'$\gamma$=0.7')
plt.legend(fontsize="9")
plt.xlabel('Absolute rank difference')
plt.ylabel('Density')
#plt.xlim([0, 6])
#plt.ylim([0, 1.3])
plt.show()

### Comparison ties vs. non-ties

In [None]:
fig, axs = plt.subplots(2, 1, figsize=(5,3.5))
axs[0].hist(df_noties[df_noties.label_r == 0].rank_diff,                             bins=np.arange(-6., 6.5, 0.3), color='blue',   alpha=1,   label='Tie',   weights=np.ones_like(df_noties[df_noties.label_r == 0].rank_diff) / len(df_noties[df_noties.label_r == 0].rank_diff))
axs[0].hist(df_noties[(df_noties.label_r != 0) & (df_noties.label_r > 0)].rank_diff, bins=np.arange(-6., 6.5, 0.3), color='purple', alpha=0.7, label='Left',  weights=np.ones_like(df_noties[(df_noties.label_r != 0) & (df_noties.label_r > 0)].rank_diff) / len(df_noties[(df_noties.label_r != 0) & (df_noties.label_r > 0)].rank_diff))
axs[0].hist(df_noties[(df_noties.label_r != 0) & (df_noties.label_r < 0)].rank_diff, bins=np.arange(-6., 6.5, 0.3), color='orange', alpha=0.7, label='Right', weights=np.ones_like(df_noties[(df_noties.label_r != 0) & (df_noties.label_r < 0)].rank_diff) / len(df_noties[(df_noties.label_r != 0) & (df_noties.label_r < 0)].rank_diff))
axs[0].axvline(x=.7, color='k', label=r'$|\gamma|$=0.7')
axs[0].axvline(x=-.7, color='k', )#label=r'$\gamma$=0.7')
axs[0].legend(fontsize="9")
axs[0].axis(ymin=0, ymax=.2, xmin=-6,xmax=6), 
axs[0].set_title('Trained w/o Ties', fontsize="11")

axs[1].hist(df_ties[df_ties.label_r == 0].rank_diff,                           bins=np.arange(-6., 6.5, 0.3), color='blue',   alpha=1,   label='Tie',   weights=np.ones_like(df_ties[df_ties.label_r == 0].rank_diff) / len(df_ties[df_ties.label_r == 0].rank_diff))
axs[1].hist(df_ties[(df_ties.label_r != 0) & (df_ties.label_r > 0)].rank_diff, bins=np.arange(-6., 6.5, 0.3), color='purple', alpha=0.7, label='Left',  weights=np.ones_like(df_ties[(df_ties.label_r != 0) & (df_ties.label_r > 0)].rank_diff) / len(df_ties[(df_ties.label_r != 0) & (df_ties.label_r > 0)].rank_diff))
axs[1].hist(df_ties[(df_ties.label_r != 0) & (df_ties.label_r < 0)].rank_diff, bins=np.arange(-6., 6.5, 0.3), color='orange', alpha=0.7, label='Right', weights=np.ones_like(df_ties[(df_ties.label_r != 0) & (df_ties.label_r < 0)].rank_diff) / len(df_ties[(df_ties.label_r != 0) & (df_ties.label_r < 0)].rank_diff))
axs[1].axvline(x=.7, color='k', label=r'$|\gamma|$=0.7')
axs[1].axvline(x=-.7, color='k', )#label=r'$\gamma$=0.7')
#axs[1].legend(fontsize="9")
axs[1].axis(ymin=0, ymax=.2, xmin=-6,xmax=6)
axs[1].set_title('Trained w/ Ties', fontsize="11")


for ax in axs.flat:
    ax.set(xlabel='Rank difference', ylabel='Relative Frequency')

# Hide x labels and tick labels for top plots and y ticks for right plots.
for ax in axs.flat:
    ax.label_outer()

fig.savefig('rank_difference_distribution.png', dpi=fig.dpi, bbox_inches="tight")

## Margin=1.5

### Trained without ties

In [None]:
model_result = glob('../outputs/saved/_noties-margin-1.5*.pkl')[0]
print('Loading model:', model_result)
df_noties = pd.read_pickle(model_result)
df_noties['rank_diff'] = (df_noties.rank_left - df_noties.rank_right)#.abs()

In [None]:
fig = plt.figure(figsize=(3,1.5))
plt.hist(df_noties.rank_diff, 25, density=1)
plt.axvline(x=.7, color = 'k', label=r'$\gamma$')
plt.title(r'Rank difference density ($\gamma$=.7). All data')
#plt.xlim([0, 10])
#plt.ylim([0, 1.3])
plt.show()

In [None]:
fig = plt.figure(figsize=(3,1.5))
plt.hist(df_noties[df_noties.label_r == 0].rank_diff, 25, density=1, color='green', alpha=1, label='Ties')
plt.hist(df_noties[df_noties.label_r != 0].rank_diff, 25, density=1, color='red', alpha=0.7, label='Non-ties')
plt.axvline(x=1.5, color='k', label=r'$\gamma$=0.7')
plt.axvline(x=-1.5, color='k', label=r'$\gamma$=0.7')
plt.legend(fontsize="9")
#plt.xlim([0, 6])
#plt.ylim([0, 1.3])
plt.show()

### Trained with ties

In [None]:
model_result = glob('../outputs/saved/margin-1.5*.pkl')[0]
print('Loading model:', model_result)
df_ties = pd.read_pickle(model_result)
df_ties['rank_diff'] = (df_ties.rank_left - df_ties.rank_right)#.abs()

In [None]:
fig = plt.figure(figsize=(3,1.5))
plt.hist(df_ties.rank_diff, 25, density=1)
plt.axvline(x=.7, color='k', label=r'$\gamma$')
plt.title(r'Rank difference density ($\gamma$=.7). All data')
#plt.xlim([0, 10])
#plt.ylim([0, 1.3])
plt.show()

In [None]:
fig = plt.figure(figsize=(3,1.5))
plt.hist(df_ties[df_ties.label_r == 0].rank_diff, 25, density=1, color='green', alpha=1, label='Ties')
plt.hist(df_ties[df_ties.label_r != 0].rank_diff, 25, density=1, color='red', alpha=0.7, label='Non-ties')
plt.axvline(x=1.5, color='k', label=r'$\gamma$=0.7')
plt.axvline(x=-1.5, color='k', label=r'$\gamma$=0.7')
plt.legend(fontsize="9")
plt.xlabel('Absolute rank difference')
plt.ylabel('Density')
#plt.xlim([0, 6])
#plt.ylim([0, 1.3])
plt.show()

### Comparison ties vs. non-ties

In [None]:
fig, axs = plt.subplots(2, 1, figsize=(5,3.5))
axs[0].hist(df_noties[df_noties.label_r == 0].rank_diff, 25, density=1, color='green', alpha=1, label='Ties')
axs[0].hist(df_noties[(df_noties.label_r != 0) & (df_noties.label_r > 0)].rank_diff, 25, density=1, color='red', alpha=0.6, label='Non-ties')
axs[0].hist(df_noties[(df_noties.label_r != 0) & (df_noties.label_r < 0)].rank_diff, 25, density=1, color='orange', alpha=0.6, label='Non-ties')
axs[0].axvline(x=1.5, color='k', label=r'$\gamma$=0.7')
axs[0].axvline(x=-1.5, color='k', label=r'$\gamma$=0.7')
axs[0].legend(fontsize="9")
axs[0].axis(ymin=0, ymax=.57, xmin=-10,xmax=10), 
axs[0].set_title('Trained w/o Ties', fontsize="11")

axs[1].hist(df_ties[df_ties.label_r == 0].rank_diff, 25, density=1, color='green', alpha=1, label='Ties')
axs[1].hist(df_ties[(df_ties.label_r != 0) & (df_ties.label_r > 0)].rank_diff, 25, density=1, color='red', alpha=0.6, label='Non-ties')
axs[1].hist(df_ties[(df_ties.label_r != 0) & (df_ties.label_r < 0)].rank_diff, 25, density=1, color='orange', alpha=0.6, label='Non-ties, ')
axs[1].axvline(x=1.5, color='k', label=r'$\gamma$=0.7')
axs[1].axvline(x=-1.5, color='k', label=r'$\gamma$=0.7')
axs[1].legend(fontsize="9")
axs[1].axis(ymin=0, ymax=.57, xmin=-10,xmax=10)
axs[1].set_title('Trained w/ Ties', fontsize="11")

for ax in axs.flat:
    ax.set(xlabel='Absolute rank difference', ylabel='Density')

# Hide x labels and tick labels for top plots and y ticks for right plots.
for ax in axs.flat:
    ax.label_outer()

fig.savefig('rank_difference_distribution.png', dpi=fig.dpi, bbox_inches="tight")

# Rank score between including and not including ties

In [None]:
compiled_results = []
for model_result in ['../outputs/saved/_noties-margin-0.7.pt_results.pkl', '../outputs/saved/margin-0.7.pt_results.pkl']:
    df = pd.read_pickle(model_result)
    margin_ = 0.7
    seed = 30
    print('Margin:', margin_, '-->', model_result)
    if 'noties' in model_result:
        ties = True
    else:
        ties = False

    X_train, X_test = train_test_split(df, test_size=0.2, random_state=seed)
    X_train, X_val  = train_test_split(X_train, test_size=0.13, random_state=seed)
    # print('\tTrain:     ', X_train.shape)
    # print('\tValidation:', X_val.shape) 
    # print('\tTest:      ', X_test.shape)

    # Ranking sub-network
    nontie_left_accuracy, nontie_right_accuracy, nontie_accuracy, tie_accuracy, overall_accuracy = compute_ranking_accuracy(X_test, margin=margin_)

    # Ranking sub-network, without any margin on accuracy
    nontie_left_accuracy_nomargin, nontie_right_accuracy_nomargin, nontie_accuracy_nomargin = compute_ranking_accuracy_nomargin(X_test)

    # Classification sub-network
    c_all_accuracy, c_tie_accuracy, c_nontie_accuracy = compute_classification_accuracy(X_test)

    # Rank difference
    avg_dist_nonties, avg_dist_ties = compute_ranking_distance(X_test)
    
    # Compile results
    result = {
        'ties': ties,
        'margin': margin_,
        'seed': seed,
        # Ranking, with margins
        'ranking_acc': overall_accuracy,
        'ranking_acc_nonties': nontie_accuracy,
        'ranking_acc_ties': tie_accuracy,
        'ranking_acc_left': nontie_left_accuracy,
        'ranking_acc_right': nontie_right_accuracy,
        # Ranking, without margins
        'ranking_acc_nonties_nomargin': nontie_accuracy_nomargin,
        'ranking_acc_left_nomargin': nontie_left_accuracy_nomargin,
        'ranking_acc_right_nomargin': nontie_right_accuracy_nomargin,
        # Classification
        'classification_acc': c_all_accuracy,
        'classification_acc_nonties': c_nontie_accuracy,
        'classification_acc_ties': c_tie_accuracy,
        # Rank difference
        'avg_dist_nonties': avg_dist_nonties,
        'avg_dist_ties': avg_dist_ties,
    }
    compiled_results.append(result)
    
results_df = pd.DataFrame(compiled_results)

In [None]:
results_df

In [None]:
compiled_results = []
for model_result in ['../outputs/saved/_noties-margin-0.7.pt_results.pkl', '../outputs/saved/margin-0.7.pt_results.pkl']:
    df = pd.read_pickle(model_result)
    margin_ = 0.7
    seed = 30
    print('Margin:', margin_, '-->', model_result)
    if 'noties' in model_result:
        ties = True
    else:
        ties = False

    collection = []
    for image in df.image_left.unique():
        rank_image = df[df.image_left == image].iloc[0].rank_left
        collection.append(
            {
                'image': image,
                'rank': rank_image, 
                'ties': ties
            }
        )
    for image in df.image_right.unique():
        rank_image = df[df.image_right == image].iloc[0].rank_right
        collection.append(
            {
                'image': image,
                'rank': rank_image, 
                'ties': ties           
            }
        )

    image_ranks = pd.DataFrame(collection).drop_duplicates()

    compiled_results.append(image_ranks)

In [None]:
image_ranks

In [None]:
all_df = pd.concat(compiled_results)

In [None]:
all_df['diff'] = all_df.groupby(by=['image'])['rank'].diff()
all_df['diff'] = all_df['diff'].abs()
all_df['relative_change'] = (all_df['diff'] / all_df['rank']).abs()

In [None]:
all_df[all_df['diff'].notna()]

In [None]:
plt.figure(figsize=(5,3))
plt.hist(all_df[all_df['diff'].notna()]['diff'], 50, color='green', alpha=1, label='Ties')
#plt.legend()
#plt.title(r'aaa')
plt.xlabel('Absolute difference')
plt.ylabel('# of images')
plt.show()

In [None]:
plt.figure(figsize=(5,3))

n,x,_ = plt.hist(all_df[all_df['ties'] == True]['rank'], 50, density=True, color='red', alpha=1, histtype='step', label='With Ties')
#bin_centers = 0.5*(x[1:]+x[:-1])
#plt.plot(bin_centers, n, 'red') ## using bin_centers rather than edges
n,x,_ = plt.hist(all_df[all_df['ties'] == False]['rank'], 50, density=True, color='blue', alpha=1, histtype='step', label='Without ties')
#bin_centers = 0.5*(x[1:]+x[:-1])
#plt.plot(bin_centers, n, 'blue') 
plt.legend()
#plt.title(r'aaa')
plt.xlabel('Image rank distribution')
plt.ylabel('Density ')
plt.show()

In [None]:
all_df[all_df['diff'].notna()].relative_change.describe()

# Rank distribution per margin

In [None]:
compiled_results = []
for model_result in glob('../outputs/saved/margin-*.pkl'):
    df = pd.read_pickle(model_result)
    margin_ = float(os.path.basename(model_result).replace('margin-', '').replace('.pt_results.pkl', ''))
    seed = 30
    print('Margin:', margin_, '-->', model_result)
    if 'noties' in model_result:
        ties = True
    else:
        ties = False

    collection = []
    for image in df.image_left.unique():
        rank_image = df[df.image_left == image].iloc[0].rank_left
        collection.append({
                'image': image,
                'rank': rank_image, 
                'ties': ties,
                'margin': margin_
            }
        )
    for image in df.image_right.unique():
        rank_image = df[df.image_right == image].iloc[0].rank_right
        collection.append({
                'image': image,
                'rank': rank_image, 
                'ties': ties,
                'margin': margin_          
            }
        )

    image_ranks = pd.DataFrame(collection).drop_duplicates()

    compiled_results.append(image_ranks)

In [None]:
all_df = pd.concat(compiled_results)

In [None]:
plt.figure(figsize=(10,8))
for margin in sorted([ 0.8, 0.7, 0.6, 1.4, 0.3, 1.2, 1.0, 1.5, 1.3, 0.2, 0.0, 1.9, 2.0, 0.9, 1.7, 0.5, 1.8, 1.6, 0.1, 0.4, 1.1,]):
    n,x,_ = plt.hist(all_df[all_df['margin'] == margin]['rank'], 50, density=True, alpha=1, histtype='step', label='{}'.format(margin))
#bin_centers = 0.5*(x[1:]+x[:-1])
#plt.plot(bin_centers, n, 'red') ## using bin_centers rather than edges
#n,x,_ = plt.hist(all_df[all_df['ties'] == False]['rank'], 50, density=True, color='blue', alpha=1, histtype='step', label='Without ties')
#bin_centers = 0.5*(x[1:]+x[:-1])
#plt.plot(bin_centers, n, 'blue') 
plt.legend()
#plt.title(r'aaa')
plt.xlabel('Image rank distribution')
plt.ylabel('Density ')
plt.xlim(-5, 5)
plt.show()

# Different Tie & Non-tie margins

In [None]:
glob('../outputs/saved/ties_margin_*.pkl') + glob('../outputs/saved/margin-0.7*.pkl')

In [None]:
compiled_results = []
for model_result in glob('../outputs/saved/ties_margin_*.pkl') + glob('../outputs/saved/margin-0.7*.pkl'):
    df = pd.read_pickle(model_result)
    seed = 30
    
    if 'colorful-firefly-666' in model_result:
        margin_nonties = 0.7
        margin_ties = 1.4
    elif 'spring-plasma-667' in model_result:
        margin_nonties = 0.7
        margin_ties = 1.0
    elif 'daily-sky-671' in model_result:
        margin_nonties = 1.0
        margin_ties = 0.7
    elif 'vibrant-pond-670' in model_result:
        margin_nonties = 1.4
        margin_ties = 0.7
    elif 'margin-0.7' in model_result:
        margin_nonties = 0.7
        margin_ties = 0.7
        
    print('Margin Nonties:', margin_nonties, ' / Margin Ties:', margin_ties, '-->', model_result)

    X_train, X_test = train_test_split(df, test_size=0.2, random_state=seed)
    X_train, X_val  = train_test_split(X_train, test_size=0.13, random_state=seed)

    # Ranking sub-network
    nontie_left_accuracy, nontie_right_accuracy, nontie_accuracy, tie_accuracy, overall_accuracy = compute_ranking_accuracy(X_test, margin=margin_ties)

    # Ranking sub-network, without any margin on accuracy
    nontie_left_accuracy_nomargin, nontie_right_accuracy_nomargin, nontie_accuracy_nomargin = compute_ranking_accuracy_nomargin(X_test)

    # Classification sub-network
    c_all_accuracy, c_tie_accuracy, c_nontie_accuracy = compute_classification_accuracy(X_test)

    # Rank difference
    avg_dist_nonties, avg_dist_ties = compute_ranking_distance(X_test)
    sum_dist_nonties, sum_dist_ties = compute_ranking_distance_sum(X_test)
    
    
    # Compile results
    result = {
        'margin_ties': margin_ties,
        'margin_nonties': margin_nonties,
        'seed': seed,
        # Ranking, with margins
        'ranking_acc': overall_accuracy,
        'ranking_acc_nonties': nontie_accuracy,
        'ranking_acc_ties': tie_accuracy,
        # Ranking, without margins
        'ranking_acc_nonties_nomargin': nontie_accuracy_nomargin,
        # Classification
        'classification_acc': c_all_accuracy,
        'classification_acc_nonties': c_nontie_accuracy,
        'classification_acc_ties': c_tie_accuracy,
        # Rank difference
        'avg_dist_nonties': avg_dist_nonties,
        'sum_dist_nonties': sum_dist_nonties,
        'avg_dist_ties': avg_dist_ties,
        'sum_dist_ties': sum_dist_ties,
    }
    compiled_results.append(result)
    
results_df = pd.DataFrame(compiled_results).sort_values(by=['margin_ties', 'margin_nonties'])




In [None]:
results_df