# Find best models

Find the best model for each trait.

In [1]:
import sys

sys.path.append("..")

In [2]:
from dataclasses import dataclass, asdict
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from ipywidgets import interact
from tqdm import tqdm

from phenobase.pylib.binary_metrics import Metrics
from phenobase.pylib.util import TRAITS

In [3]:
CSV = Path("..") / "data" / "thresholds.csv"

DF = pd.read_csv(CSV)
DF.shape

(18000, 13)

In [17]:
FRACT_CUTOFF = 0.75
RECALL_CUTOFF = 0.5
FP_CUTOFF = 0.05

## Flowers

In [23]:
df_fl = DF.loc[(DF["fract"] >= FRACT_CUTOFF) & (DF["trait"] == "flowers")] & (DF["recall"] >= RECALL_CUTOFF)]
df_fl = df_fl.loc[(df_fl["fp"] / df_fl["tp"]) <= FP_CUTOFF]
df_fl.shape

(0, 13)

In [6]:
best = df_fl["ppv"].max()
print(f"best = {best}")
df_fl = df_fl.loc[df_fl["ppv"] == best]
df_fl.shape

best = 0.9371980676328504


(1, 13)

In [7]:
most = df_fl["fract"].max()
print(f"most = {most}")
df_fl = df_fl.loc[df_fl["fract"] == most]
df_fl.shape

most = 0.7836919592298981


(1, 13)

In [8]:
print(
    f"tp = {df_fl.iloc[0]['tp']:4.0f}    fn = {df_fl.iloc[0]['fn']:4.0f}\n"
    f"fp = {df_fl.iloc[0]['fp']:4.0f}    tn = {df_fl.iloc[0]['tn']:4.0f}"
)

df_fl.head()

tp =  194    fn =  187
fp =   13    tn =  298


Unnamed: 0,checkpoint,trait,threshold,total,ge_threshold,fract,tp,tn,fp,fn,accuracy,recall,ppv
9784,data/local/effnet_456_flowers_pos/checkpoint-1075,flowers,0.84,883,692.0,0.783692,194.0,298.0,13.0,187.0,0.710983,0.509186,0.937198


## Fruits

In [24]:
df_fr = DF.loc[(DF["fract"] >= FRACT_CUTOFF) & (DF["trait"] == "fruits") & (DF["recall"] >= RECALL_CUTOFF)]
df_fr = df_fr.loc[(df_fl["fp"] / df_fr["tp"]) <= FP_CUTOFF]
df_fr.shape

(0, 13)

In [10]:
best = df_fr["ppv"].max()
print(f"best = {best}")
df_fr = df_fr.loc[df_fr["ppv"] == best]
df_fr.shape

best = 0.8552036199095022


(1, 13)

In [11]:
most = df_fr["fract"].max()
print(f"most = {most}")
df_fr = df_fr.loc[df_fr["fract"] == most]
df_fr.shape

most = 0.77


(1, 13)

In [12]:
print(
    f"tp = {df_fr.iloc[0]['tp']:4.0f}    fn = {df_fr.iloc[0]['fn']:4.0f}\n"
    f"fp = {df_fr.iloc[0]['fp']:4.0f}    tn = {df_fr.iloc[0]['tn']:4.0f}"
)

df_fr.head()

tp =  189    fn =  127
fp =   32    tn =  268


Unnamed: 0,checkpoint,trait,threshold,total,ge_threshold,fract,tp,tn,fp,fn,accuracy,recall,ppv
5945,data/tuned/vit_384_base_hf/checkpoint-437,fruits,0.95,800,616.0,0.77,189.0,268.0,32.0,127.0,0.741883,0.598101,0.855204


## Leaves

In [25]:
df_lf = DF.loc[(DF["fract"] >= FRACT_CUTOFF) & (DF["trait"] == "leaves") & (DF["recall"] >= RECALL_CUTOFF)]
df_lf = df_lf.loc[(df_fl["fp"] / df_lf["tp"]) <= FP_CUTOFF]
df_lf.shape

(0, 13)

In [14]:
best = df_lf["ppv"].max()
print(f"best = {best}")
df_lf = df_lf.loc[df_lf["ppv"] == best]
df_lf.shape

best = 1.0


(16, 13)

In [15]:
most = df_lf["fract"].max()
print(f"most = {most}")
df_lf = df_lf.loc[df_lf["fract"] == most]
df_lf.shape

most = 1.0


(1, 13)

In [16]:
print(
    f"tp = {df_lf.iloc[0]['tp']:4.0f}    fn = {df_lf.iloc[0]['fn']:4.0f}\n"
    f"fp = {df_lf.iloc[0]['fp']:4.0f}    tn = {df_lf.iloc[0]['tn']:4.0f}"
)

df_lf.head()

tp =  561    fn =  386
fp =    0    tn =   39


Unnamed: 0,checkpoint,trait,threshold,total,ge_threshold,fract,tp,tn,fp,fn,accuracy,recall,ppv
17950,data/tuned/vit_384_lg_leaves_prec_wt/checkpoin...,leaves,0.5,986,986.0,1.0,561.0,39.0,0.0,386.0,0.608519,0.592397,1.0
