In [None]:
import numpy as np
import pandas as pd
from glob import glob
import os
import pickle
import re
from sklearn.model_selection import train_test_split

In [None]:
import matplotlib.pyplot as plt

In [None]:
from matplotlib import rc
#rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']})
rc('font',**{'family':'serif',
             'serif':['Times New Roman'],
             'size': 12,
            })

In [None]:
def compute_ranking_accuracy(df_, margin=0):
    # Split in non-ties and ties
    df_nonties = df_[df_.label_r != 0]
    df_ties = df_[df_.label_r == 0]

    # Split non ties per their outcome (left and right)
    df_nonties_left = df_nonties[(df_nonties.label_r == -1)]
    df_nonties_right = df_nonties[(df_nonties.label_r == 1)]

    # Non-ties accuracy
    correct_left = ((df_nonties.label_r == -1) & (df_nonties.rank_left - df_nonties.rank_right > margin)).sum()
    correct_right = ((df_nonties.label_r == 1) & (df_nonties.rank_right - df_nonties.rank_left > margin)).sum()

    nontie_left_accuracy = correct_left / (df_nonties.label_r == -1).sum()
    nontie_right_accuracy = correct_right / (df_nonties.label_r == 1).sum()
    nontie_accuracy = (correct_left + correct_right ) / df_nonties.shape[0]
    
    # Ties accuracy
    tie_accuracy = (abs(df_ties.rank_left - df_ties.rank_right) < margin).sum() / df_ties.shape[0]

    # Overall accuracy
    overall_accuracy = X_test[((df_.label_r == -1) & (df_.rank_left - df_.rank_right > margin)) |
                              ((df_.label_r ==  1) & (df_.rank_right - df_.rank_left > margin)) |
                              ((df_.label_r ==  0) & (abs(df_.rank_left - df_.rank_right) < margin))].shape[0] / df_.shape[0]
    
    return nontie_left_accuracy, nontie_right_accuracy, nontie_accuracy, tie_accuracy, overall_accuracy

In [None]:
def compute_ranking_accuracy_nomargin(df_,):
    # Split in non-ties and ties
    df_nonties = df_[df_.label_r != 0]

    # Split non ties per their outcome (left and right)
    df_nonties_left = df_nonties[(df_nonties.label_r == -1)]
    df_nonties_right = df_nonties[(df_nonties.label_r == 1)]

    # Non-ties accuracy
    correct_left = ((df_nonties.label_r == -1) & (df_nonties.rank_left - df_nonties.rank_right > 0)).sum()
    correct_right = ((df_nonties.label_r == 1) & (df_nonties.rank_right - df_nonties.rank_left > 0)).sum()

    nontie_left_accuracy = correct_left / (df_nonties.label_r == -1).sum()
    nontie_right_accuracy = correct_right / (df_nonties.label_r == 1).sum()
    nontie_accuracy = (correct_left + correct_right ) / df_nonties.shape[0]

    return nontie_left_accuracy, nontie_right_accuracy, nontie_accuracy

In [None]:
def compute_ranking_distance(df_):
    # Split in non-ties and ties
    df_nonties = df_[df_.label_r != 0]
    df_ties = df_[df_.label_r == 0]

    # Distance between non-ties
    avg_dist_nonties = abs(df_nonties.rank_left - df_nonties.rank_right).mean()
    
    # Distance between ties
    avg_dist_ties = abs(df_ties.rank_left - df_ties.rank_right).mean()
    
    return avg_dist_nonties, avg_dist_ties

In [None]:
def compute_classification_accuracy(df_):
    def softmax(x):
        e_x = np.exp(x - np.max(x))
        return e_x / e_x.sum(axis=0)
    if 'logits_0' in df_.columns:
        col1_values = df_['logits_l'].values
        col2_values = df_['logits_0'].values
        col3_values = df_['logits_r'].values
            
        probabilities = np.apply_along_axis(softmax, axis=1, arr=np.column_stack((col1_values, col2_values, col3_values)))
        max_indices = np.argmax(probabilities, axis=1)
        # Convert the probabilities back to a DataFrame with appropriate column names
        softmax_df = pd.DataFrame(probabilities, columns=['softmax_logit_l', 'softmax_logit_0', 'softmax_logit_r'])
        max_index_df = pd.DataFrame({'class_predicted': max_indices})
        # Concatenate the new DataFrame with the original DataFrame if needed
        result_df = pd.concat([df_.reset_index(drop=True), softmax_df, max_index_df], axis=1,)
    
        all_accuracy = (result_df.class_predicted == result_df.label_c).sum() / result_df.shape[0]
        tie_accuracy = (result_df[result_df.label_c == 1].class_predicted == result_df[result_df.label_c == 1].label_c).sum() / result_df.shape[0]
        nontie_accuracy = (result_df[result_df.label_c != 1].class_predicted == result_df[result_df.label_c != 1].label_c).sum() / result_df.shape[0]
    
    # Without ties
    else:
        col1_values = df_['logits_l'].values
        col3_values = df_['logits_r'].values
            
        probabilities = np.apply_along_axis(softmax, axis=1, arr=np.column_stack((col1_values, col3_values)))
        max_indices = np.argmax(probabilities, axis=1)
        # Convert the probabilities back to a DataFrame with appropriate column names
        softmax_df = pd.DataFrame(probabilities, columns=['softmax_logit_l', 'softmax_logit_r'])
        max_index_df = pd.DataFrame({'class_predicted': max_indices})
        # Concatenate the new DataFrame with the original DataFrame if needed
        result_df = pd.concat([df_.reset_index(drop=True), softmax_df, max_index_df], axis=1,)
    
        all_accuracy = (result_df.class_predicted == result_df.label_c).sum() / result_df.shape[0]
        tie_accuracy = 0
        nontie_accuracy = all_accuracy
        
    return all_accuracy, tie_accuracy, nontie_accuracy

## Available results

In [None]:
model_results = glob('../outputs/saved/weights/weights_*.pkl')

In [None]:
model_results[0]

## Process results

In [None]:
os.path.basename(model_result).replace('weights_vgg_', '').replace('.pt_results.pkl', '').split('_')

In [None]:
compiled_results = []
for model_result in model_results:
    df = pd.read_pickle(model_result)
    
    rank_w, tie_w = os.path.basename(model_result).replace('weights_vgg_', '').replace('.pt_results.pkl', '').split('_')
    rank_w, tie_w = float(rank_w), float(tie_w)
    seed = 30
    print('Weights:', rank_w, ' / ', tie_w, '-->', model_result)
    margin_ = 0.7

    X_train, X_test = train_test_split(df, test_size=0.2, random_state=seed)
    X_train, X_val  = train_test_split(X_train, test_size=0.13, random_state=seed)
    # print('\tTrain:     ', X_train.shape)
    # print('\tValidation:', X_val.shape) 
    # print('\tTest:      ', X_test.shape)

    # Ranking sub-network
    nontie_left_accuracy, nontie_right_accuracy, nontie_accuracy, tie_accuracy, overall_accuracy = compute_ranking_accuracy(X_test, margin=margin_)

    # Ranking sub-network, without any margin on accuracy
    nontie_left_accuracy_nomargin, nontie_right_accuracy_nomargin, nontie_accuracy_nomargin = compute_ranking_accuracy_nomargin(X_test)

    # Classification sub-network
    c_all_accuracy, c_tie_accuracy, c_nontie_accuracy = compute_classification_accuracy(X_test)

    # Rank difference
    avg_dist_nonties, avg_dist_ties = compute_ranking_distance(X_test)
    
    # Compile results
    result = {
        'rank_w': rank_w,
        'tie_w': tie_w,
        'seed': seed,
        # Ranking, with margins
        'ranking_acc': overall_accuracy,
        'ranking_acc_nonties': nontie_accuracy,
        'ranking_acc_ties': tie_accuracy,
        'ranking_acc_left': nontie_left_accuracy,
        'ranking_acc_right': nontie_right_accuracy,
        # Ranking, without margins
        'ranking_acc_nonties_nomargin': nontie_accuracy_nomargin,
        'ranking_acc_left_nomargin': nontie_left_accuracy_nomargin,
        'ranking_acc_right_nomargin': nontie_right_accuracy_nomargin,
        # Classification
        'classification_acc': c_all_accuracy,
        'classification_acc_nonties': c_nontie_accuracy,
        'classification_acc_ties': c_tie_accuracy,
        # Rank difference
        'avg_dist_nonties': avg_dist_nonties,
        'avg_dist_ties': avg_dist_ties,
    }
    compiled_results.append(result)
    
results_df = pd.DataFrame(compiled_results)

In [None]:
results_df = results_df.sort_values(by=['rank_w', 'tie_w'])
results_df

In [None]:
aa = results_df[['rank_w', 'tie_w', 'ranking_acc', 'ranking_acc_nonties', 'ranking_acc_ties', 'ranking_acc_nonties_nomargin']]
aa

In [None]:
%matplotlib ipympl

In [None]:
fig = plt.figure()
ax = plt.axes(projection ='3d')
ax.plot3D(aa.rank_w, aa.tie_w, aa.ranking_acc_nonties_nomargin, 'green')
plt.show()

In [None]:
import plotly.graph_objects as go
import numpy as np

# Helix equation
t = np.linspace(0, 10, 50)
x, y, z = np.cos(t), np.sin(t), t

fig = go.Figure(data=[go.Scatter3d(x=aa.rank_w, y=aa.tie_w, z=aa.ranking_acc_ties,
                                   mode='markers', ),
                      go.Scatter3d(x=aa.rank_w, y=aa.tie_w, z=aa.ranking_acc_nonties,
                                   mode='markers', )
                      ])
fig.show()

In [None]:
import plotly.express as px
df = px.data.iris()
#fig = px.scatter_3d(aa, x='rank_w', y='tie_w', z='ranking_acc_nonties',
#              color='rank_w')
fig = px.scatter_3d(aa, x='rank_w', y='tie_w', z='ranking_acc_ties',
              color='rank_w')
fig.show()