# Plotting Few-Shot Model Evaluation Results

Assembling plots from summary files.

In [1]:
import os
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm

sys.path.insert(0, os.path.join(os.path.dirname(os.path.realpath("__file__")), ".."))

from plotting.utils import (
    highlight_max_all, 
    plot_all_assays, 
    load_data,
    expand_values,
    plot_task_performances_by_id,
    aggregate_by_class,
    box_plot,
    plot_by_size,
    get_aggregates_across_sizes
)

## Loading the collated evaluation data

Create a dictionary of all model summary .csvs to be compared. The csvs are the final summaries from `collect_eval_runs.py`

In [2]:
# Configure this to contain all the models that you want to look at.
# Dict keys are human readable names, values are the path to the summary produced by collect_eval_runs.py
data_path = "/mnt/genchemdata/preprocessed-data/fsmol"
results_path = os.path.join(data_path, "results/")
model_summaries = {
    "GNN-MAML": results_path + ("MAML-Support16_summary.csv"),
#     "PN": results_path + ("PN_summary.csv"),
    "GNN-MT": results_path + "GNN-Multitask_summary.csv",
    "ST": results_path + "random_forest_summary.csv",
#     "GNN-ST": results_path + "GNN_summary.csv",
    "kNN": results_path + "kNN_summary.csv",
#     "MAT": results_path + "MAT_summary.csv",
}
# Generated plots will be stored here, if you want to keep them. None disables saving.
plot_output_dir = results_path + "plots/"
os.makedirs(plot_output_dir, exist_ok=True)

In [3]:
data = load_data(model_summaries)

Loading data for GNN-MAML from /mnt/genchemdata/preprocessed-data/fsmol/results/MAML-Support16_summary.csv.
Loading data for GNN-MT from /mnt/genchemdata/preprocessed-data/fsmol/results/GNN-Multitask_summary.csv.
Loading data for ST from /mnt/genchemdata/preprocessed-data/fsmol/results/random_forest_summary.csv.
Loading data for kNN from /mnt/genchemdata/preprocessed-data/fsmol/results/kNN_summary.csv.


## Highlight the best result for each task

In [None]:
styled_df = data.style.apply(lambda row: highlight_max_all(row), axis=1)
# To save for exporting purposes, uncomment this:
styled_df.to_excel(os.path.join(plot_output_dir, f"all_model_highlighted_comparison.xlsx"), engine='xlsxwriter')

styled_df

In [4]:
# expand out from val +/- error format, and calculate delta AUPRC
data = expand_values(data, model_summaries)

## Performance Overview over all Tasks

This compares with the trivial baseline of using a weighted coinflip according to the class imbalance in the training data.

In [None]:
plot_task_performances_by_id(data, model_summaries, support_set_size = 16)

### Incorporate protein information

Our test tasks have associated target protein information available. We can merge this data to allow plotting with specific EC number classes highlighted.

In [6]:
protein_path =os.path.join(data_path, "targets/test_proteins.csv")

ecs =pd.read_csv(protein_path)

ecs["target_id"] = ecs["target_id"].astype(int).astype(str)
ecs["chembl_id"] = ecs["chembl_id"].astype(str)
ecs["TASK_ID"] = ecs.apply(lambda row: row["chembl_id"][6:], axis = 1)


data = ecs.merge(data, on="TASK_ID")

In [None]:
plot_task_performances_by_id(data, model_summaries, support_set_size = 16, highlight_class =2)

## Plot for each task, comparing different models

In [None]:
# plot_all_assays(data, model_summaries.keys(), results_dir = plot_output_dir)

# Summarise the overall performance in box plots



In [None]:
box_plot(data, model_summaries, support_set_size = 16)

## 2. Aggregate as a function of the number of training points, across all categories

In [7]:
aggregate_df = get_aggregates_across_sizes(data, model_summaries)

In [8]:
aggregate_df

Unnamed: 0_level_0,16_train (GNN-MAML),16_train (GNN-MT),16_train (ST),16_train (kNN),16_train (GNN-MAML) std,16_train (GNN-MT) std,16_train (ST) std,16_train (kNN) std,32_train (GNN-MAML),32_train (GNN-MT),...,128_train (ST) std,128_train (kNN) std,256_train (GNN-MAML),256_train (GNN-MT),256_train (ST),256_train (kNN),256_train (GNN-MAML) std,256_train (GNN-MT) std,256_train (ST) std,256_train (kNN) std
EC_category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
3,0.104527,0.107777,0.157427,0.084977,0.02363,0.025198,0.027679,0.018686,0.122527,0.123577,...,0.034865,0.028164,0.12091,0.143624,0.230624,0.148624,0.035184,0.042559,0.055089,0.041259
2,0.176818,0.093034,0.080842,0.045994,0.009174,0.006513,0.006563,0.0046,0.183786,0.125186,...,0.00919,0.007428,0.222212,0.263793,0.279502,0.190825,0.022102,0.022518,0.017994,0.016464
1,0.054114,0.0524,0.079829,0.042686,0.02795,0.018057,0.029221,0.018306,0.057257,0.071686,...,0.043741,0.031359,0.034892,0.083392,0.153142,0.084642,0.012936,0.023856,0.032241,0.02449
4,0.213392,0.177892,0.190392,0.038892,0.144373,0.134873,0.173373,0.022873,0.219392,0.173392,...,0.157873,0.164873,,,,,,,,
5,0.016765,0.062765,0.116765,0.060765,0.037,0.06,0.041,0.036,0.066765,0.035765,...,0.026,0.025,,,,,,,,
7,-0.022119,0.072881,0.080881,0.060881,0.035,0.053,0.057,0.04,0.022881,0.051881,...,0.057,0.037,,,,,,,,
6,-0.010788,0.011212,0.020212,0.010212,0.013,0.036,0.057,0.021,-0.010788,0.004212,...,0.055,0.016,0.044212,0.165212,0.258212,0.145212,0.023,0.036,0.015,0.012
all,0.159122,0.093339,0.091791,0.050683,0.008703,0.006349,0.006948,0.004539,0.167785,0.121403,...,0.009076,0.007407,0.184156,0.225156,0.259296,0.173017,0.019545,0.020122,0.016714,0.014444


In [None]:
# this function has the option to plot all classes separately.
plot_by_size(aggregate_df, model_summaries, plot_output_dir = plot_output_dir)

# Ranking

In [10]:
from autorank import autorank

# select correct data to rank with autorank
for size in [16]:

    df = data[[x for x in list(data.columns) if x.startswith(f"{size}") and "val" in x and "delta-auprc" in x]]

In [11]:
result = autorank(df, verbose=False)
result.rankdf["meanrank"]

16_train (GNN-MAML) val delta-auprc    1.792994
16_train (ST) val delta-auprc          2.280255
16_train (GNN-MT) val delta-auprc      2.398089
16_train (kNN) val delta-auprc         3.528662
Name: meanrank, dtype: float64