In [1]:
import pandas as pd
import os

In [2]:
def get_rank(dataset):
    # Load the DataFrame
    df = pd.read_csv(f'loss_csvs/{dataset}.csv')

    # Filter out unwanted methods
    df = df[~df["method"].isin(["aft_xgboost", "aft_xgboost_extreme", "aft_xgboost_sigmoid"])]

    # Compute mean and std of loss
    summary_df = df.groupby("method")["loss"].agg(mean_loss="mean", std_loss="std").reset_index()

    # Assign rankings (1 is the best)
    summary_df["ranking_performance"] = summary_df["mean_loss"].rank(method="min", ascending=True).astype(int)
    summary_df["ranking_consistency"] = summary_df["std_loss"].rank(method="min", ascending=True).astype(int)

    rank_df = summary_df[['method', 'ranking_performance', 'ranking_consistency']]

    rank_df.loc[:, 'method'] = pd.Categorical(
        rank_df['method'], 
        categories=['constant', 'knn', 'linear', 'mmit', 'rf', 'aft_xgboost_original', 'mlp'], 
        ordered=True
    )

    rank_df = rank_df.sort_values(by='method')
    return rank_df

In [3]:
def get_all_ranks(datasets):
    performance_rank_dict = {}
    consistency_rank_dict = {}
    
    for dataset in datasets:
        rank_df = get_rank(dataset)
        
        performance_rank_dict[dataset] = rank_df.set_index("method")['ranking_performance']
        consistency_rank_dict[dataset] = rank_df.set_index("method")['ranking_consistency']
    
    performance_rank_df = pd.DataFrame(performance_rank_dict).T
    consistency_rank_df = pd.DataFrame(consistency_rank_dict).T
    
    return performance_rank_df, consistency_rank_df

In [4]:
datasets = [name for name in os.listdir('data') if os.path.isdir(os.path.join('data', name))]
performance_rank_df, consistency_rank_df = get_all_ranks(datasets)

performance_rank_df = performance_rank_df[['constant', 'knn', 'linear', 'mmit', 'rf', 'aft_xgboost_original', 'mlp']]
performance_rank_df = performance_rank_df.rename(columns={'rf': 'mmif', 'aft_xgboost_original': 'aft_xgb'})

consistency_rank_df = consistency_rank_df[['constant', 'knn', 'linear', 'mmit', 'rf', 'aft_xgboost_original', 'mlp']]
consistency_rank_df = consistency_rank_df.rename(columns={'rf': 'mmif', 'aft_xgboost_original': 'aft_xgb'})

In [5]:
performance_rank_df

method,constant,knn,linear,mmit,mmif,aft_xgb,mlp
auto93,7,6,3,5,1,2,4
autohorse,7,4,2,5,1,6,3
autompg,7,6,2,3,4,5,1
autoprice,7,4,6,2,1,5,3
baskball,6,2,5,3,1,7,4
bodyfat,7,6,2,4,3,5,1
breasttumor,6,1,4,5,2,7,3
cholesterol,3,5,4,6,1,7,2
cleveland,6,4,2,5,3,7,1
cloud,7,6,3,2,5,4,1


In [12]:
print(performance_rank_df.to_latex(index=True, header=True, escape=False))

\begin{tabular}{lrrrrrrr}
\toprule
method & constant & knn & linear & mmit & mmif & aft_xgb & mlp \\
\midrule
auto93 & 7 & 6 & 3 & 5 & 1 & 2 & 4 \\
autohorse & 7 & 4 & 2 & 5 & 1 & 6 & 3 \\
autompg & 7 & 6 & 2 & 3 & 4 & 5 & 1 \\
autoprice & 7 & 4 & 6 & 2 & 1 & 5 & 3 \\
baskball & 6 & 2 & 5 & 3 & 1 & 7 & 4 \\
bodyfat & 7 & 6 & 2 & 4 & 3 & 5 & 1 \\
breasttumor & 6 & 1 & 4 & 5 & 2 & 7 & 3 \\
cholesterol & 3 & 5 & 4 & 6 & 1 & 7 & 2 \\
cleveland & 6 & 4 & 2 & 5 & 3 & 7 & 1 \\
cloud & 7 & 6 & 3 & 2 & 5 & 4 & 1 \\
cpu & 7 & 6 & 4 & 3 & 1 & 2 & 5 \\
echomonths & 7 & 4 & 2 & 5 & 3 & 6 & 1 \\
elusage & 7 & 2 & 1 & 3 & 4 & 5 & 6 \\
fishcatch & 7 & 6 & 4 & 5 & 3 & 2 & 1 \\
fruitfly & 1 & 5 & 2 & 6 & 3 & 7 & 4 \\
housing & 7 & 3 & 6 & 5 & 4 & 2 & 1 \\
lowbwt & 6 & 5 & 3 & 2 & 1 & 7 & 4 \\
lymphoma.mkatayama & 1 & 5 & 6 & 3 & 4 & 7 & 2 \\
lymphoma.tdh & 3 & 6 & 2 & 4 & 1 & 7 & 5 \\
machine.cpu & 7 & 6 & 5 & 3 & 2 & 1 & 4 \\
mbagrade & 3 & 6 & 5 & 1 & 2 & 4 & 7 \\
meta & 5 & 1 & 2 & 6 & 4 & 7 & 3 \\
p

In [6]:
consistency_rank_df

method,constant,knn,linear,mmit,mmif,aft_xgb,mlp
auto93,7,6,4,2,1,3,5
autohorse,7,4,2,5,1,6,3
autompg,7,5,1,4,2,6,3
autoprice,7,3,4,2,1,6,5
baskball,5,4,3,2,1,6,7
bodyfat,7,5,1,4,3,6,2
breasttumor,6,1,2,5,3,7,4
cholesterol,5,6,4,7,1,3,2
cleveland,6,5,1,4,3,7,2
cloud,7,6,3,2,5,4,1


In [13]:
print(consistency_rank_df.to_latex(index=True, header=True, escape=False))

\begin{tabular}{lrrrrrrr}
\toprule
method & constant & knn & linear & mmit & mmif & aft_xgb & mlp \\
\midrule
auto93 & 7 & 6 & 4 & 2 & 1 & 3 & 5 \\
autohorse & 7 & 4 & 2 & 5 & 1 & 6 & 3 \\
autompg & 7 & 5 & 1 & 4 & 2 & 6 & 3 \\
autoprice & 7 & 3 & 4 & 2 & 1 & 6 & 5 \\
baskball & 5 & 4 & 3 & 2 & 1 & 6 & 7 \\
bodyfat & 7 & 5 & 1 & 4 & 3 & 6 & 2 \\
breasttumor & 6 & 1 & 2 & 5 & 3 & 7 & 4 \\
cholesterol & 5 & 6 & 4 & 7 & 1 & 3 & 2 \\
cleveland & 6 & 5 & 1 & 4 & 3 & 7 & 2 \\
cloud & 7 & 6 & 3 & 2 & 5 & 4 & 1 \\
cpu & 7 & 4 & 5 & 2 & 1 & 3 & 6 \\
echomonths & 7 & 2 & 5 & 4 & 3 & 6 & 1 \\
elusage & 7 & 4 & 1 & 2 & 3 & 5 & 6 \\
fishcatch & 7 & 6 & 2 & 5 & 4 & 3 & 1 \\
fruitfly & 3 & 5 & 2 & 7 & 1 & 6 & 4 \\
housing & 7 & 3 & 4 & 5 & 2 & 6 & 1 \\
lowbwt & 6 & 4 & 3 & 1 & 2 & 7 & 5 \\
lymphoma.mkatayama & 1 & 6 & 5 & 2 & 3 & 7 & 4 \\
lymphoma.tdh & 3 & 6 & 4 & 2 & 1 & 7 & 5 \\
machine.cpu & 7 & 6 & 5 & 2 & 3 & 1 & 4 \\
mbagrade & 6 & 2 & 5 & 1 & 3 & 4 & 7 \\
meta & 6 & 1 & 2 & 5 & 4 & 7 & 3 \\
p

In [7]:
# Assuming performance_rank_df is already defined and contains the columns 'constant', 'knn', etc.
methods = ['constant', 'knn', 'linear', 'mmit', 'mmif', 'aft_xgb', 'mlp']
counts_dict = {}

# Loop through each method and get value_counts()
for method in methods:
    counts_dict[method] = performance_rank_df[method].value_counts()

# Create a DataFrame from the dictionary and ensure the correct order of methods
performance_counts_df = pd.DataFrame(counts_dict).T.fillna(0).astype(int)

# Reorder the rows to match the order of methods in the list
performance_counts_df = performance_counts_df.loc[methods]

# Print the resulting DataFrame
performance_counts_df

Unnamed: 0,1,2,3,4,5,6,7
constant,2,2,3,1,4,7,20
knn,4,4,2,6,6,16,1
linear,3,11,6,8,5,5,1
mmit,2,10,9,3,9,4,2
mmif,15,5,8,7,4,0,0
aft_xgb,4,4,3,4,7,5,12
mlp,9,3,8,10,4,2,3


In [8]:
# Convert the DataFrame to LaTeX table code
print(performance_counts_df.to_latex(index=True, header=True, escape=False))

\begin{tabular}{lrrrrrrr}
\toprule
 & 1 & 2 & 3 & 4 & 5 & 6 & 7 \\
\midrule
constant & 2 & 2 & 3 & 1 & 4 & 7 & 20 \\
knn & 4 & 4 & 2 & 6 & 6 & 16 & 1 \\
linear & 3 & 11 & 6 & 8 & 5 & 5 & 1 \\
mmit & 2 & 10 & 9 & 3 & 9 & 4 & 2 \\
mmif & 15 & 5 & 8 & 7 & 4 & 0 & 0 \\
aft_xgb & 4 & 4 & 3 & 4 & 7 & 5 & 12 \\
mlp & 9 & 3 & 8 & 10 & 4 & 2 & 3 \\
\bottomrule
\end{tabular}



In [9]:
# Assuming consistency_rank_df is already defined and contains the columns 'constant', 'knn', etc.
methods = ['constant', 'knn', 'linear', 'mmit', 'mmif', 'aft_xgb', 'mlp']
counts_dict = {}

# Loop through each method and get value_counts()
for method in methods:
    counts_dict[method] = consistency_rank_df[method].value_counts()

# Create a DataFrame from the dictionary and ensure the correct order of methods
consistency_counts_df = pd.DataFrame(counts_dict).T.fillna(0).astype(int)

# Reorder the rows to match the order of methods in the list
consistency_counts_df = consistency_counts_df.loc[methods]

# Print the resulting DataFrame
consistency_counts_df

Unnamed: 0,1,2,3,4,5,6,7
constant,4,0,2,3,3,8,19
knn,4,3,3,7,7,12,3
linear,7,7,8,7,7,3,0
mmit,2,13,4,8,8,2,2
mmif,12,9,9,4,3,2,0
aft_xgb,4,1,7,2,3,10,12
mlp,6,6,6,8,8,2,3


In [10]:
print(consistency_counts_df.to_latex(index=True, header=True, escape=False))

\begin{tabular}{lrrrrrrr}
\toprule
 & 1 & 2 & 3 & 4 & 5 & 6 & 7 \\
\midrule
constant & 4 & 0 & 2 & 3 & 3 & 8 & 19 \\
knn & 4 & 3 & 3 & 7 & 7 & 12 & 3 \\
linear & 7 & 7 & 8 & 7 & 7 & 3 & 0 \\
mmit & 2 & 13 & 4 & 8 & 8 & 2 & 2 \\
mmif & 12 & 9 & 9 & 4 & 3 & 2 & 0 \\
aft_xgb & 4 & 1 & 7 & 2 & 3 & 10 & 12 \\
mlp & 6 & 6 & 6 & 8 & 8 & 2 & 3 \\
\bottomrule
\end{tabular}

