# Accumulate scores

Take raw scores gotten using score_models.py and accumulate the results for each checkpoint.

In [1]:
import sys

sys.path.append("..")

In [2]:
from dataclasses import asdict, dataclass
from pathlib import Path

import numpy as np
import pandas as pd
from tqdm import tqdm

from phenobase.pylib import util
from phenobase.pylib.binary_metrics import Metrics

In [3]:
CSV = Path("..") / "data" / "score.csv"
OUT = Path("..") / "data" / "thresholds.csv"

DF = pd.read_csv(CSV)
PRETRAINED = DF["pretrained"].unique()
# PRETRAINED

In [4]:
@dataclass
class Thresholds:
    checkpoint: str
    trait: str
    threshold: float = None
    total: int = 0
    ge_threshold: int = 0
    fract: float = 0.0
    tp: int = 0
    tn: int = 0
    fp: int = 0
    fn: int = 0

In [5]:
step = 0.01
thresholds = []

for checkpoint in tqdm(PRETRAINED):
    df = DF.loc[DF["pretrained"] == checkpoint, :]
    for trait in util.TRAITS:
        metrics = Metrics()
        metrics.y_true = df[f"{trait}_true"]
        metrics.y_pred = df[f"{trait}_pred"]
        for threshold_hi in np.arange(0.5, 1.0, step):
            metrics.remove_equivocal(threshold_hi=threshold_hi)
            rec = Thresholds(
                checkpoint=checkpoint,
                trait=trait,
                threshold=threshold_hi,
                total=df.shape[0],
                ge_threshold=metrics.total,
                fract=metrics.total / df.shape[0],
                tp=metrics.tp,
                tn=metrics.tn,
                fp=metrics.fp,
                fn=metrics.fn,
            )
            thresholds.append(asdict(rec))

100%|███████████████████████████████████████████████████████████████████████████████████████| 120/120 [00:03<00:00, 33.30it/s]


In [6]:
df2 = pd.DataFrame(thresholds)
df2.head()

Unnamed: 0,checkpoint,trait,threshold,total,ge_threshold,fract,tp,tn,fp,fn,accuracy,recall,ppv
0,data/tuned/effnet_528_f1/checkpoint-4736,flowers,0.5,800,800.0,1.0,402.0,205.0,153.0,40.0,0.75875,0.909502,0.724324
1,data/tuned/effnet_528_f1/checkpoint-4736,flowers,0.51,800,798.0,0.9975,401.0,205.0,152.0,40.0,0.759398,0.909297,0.725136
2,data/tuned/effnet_528_f1/checkpoint-4736,flowers,0.52,800,797.0,0.99625,401.0,205.0,151.0,40.0,0.760351,0.909297,0.726449
3,data/tuned/effnet_528_f1/checkpoint-4736,flowers,0.53,800,796.0,0.995,400.0,205.0,151.0,40.0,0.76005,0.909091,0.725953
4,data/tuned/effnet_528_f1/checkpoint-4736,flowers,0.54,800,796.0,0.995,400.0,205.0,151.0,40.0,0.76005,0.909091,0.725953


In [7]:
df2.to_csv(OUT, index=False)