## Collecting Evaluations from Google Sheets

Aggregates evaluations, making an md5 for every contributor. Also joins with additional metadata
about each definition, including its source

In [1]:
import pandas as pd
import gspread
from google.oauth2.service_account import Credentials
gc = gspread.service_account()

In [2]:
scopes = ["https://spreadsheets.google.com/feeds", 'https://www.googleapis.com/auth/spreadsheets',
          "https://www.googleapis.com/auth/drive.file", "https://www.googleapis.com/auth/drive"]
creds = Credentials.from_service_account_file('secrets/linkml-336707-1c85304108ea.json', scopes=scopes)
client = gspread.authorize(creds)


In [3]:
import time

In [4]:
names = [x.strip() for x in open("definitions-sheets/evaluator-ids.txt").readlines()]

In [5]:
dfs = []
all_rows = []
for name in names:
    #print(name)
    wks = gc.open(str(f"Definition Evaluation Template - {name}")).worksheet("Definitions (EDIT HERE)")
    rows = wks.get_all_records()
    rows = [row for row in rows if row["score"] or row["accuracy"]]
    propagated = 0
    for row in rows:
        row["evaluator"] = name
        for k in ["accuracy", "internal_consistency", "score", "confidence"]:
            v = row[k]
            if v == "?":
                v = ""
            if isinstance(v,float):
                v = round(v)
            if isinstance(v,str) and "-" in v:
                # some customized their form
                v = int(v.split("-")[0])
            if v and v not in list(range(1,6)):
                print(f"BAD VALUE {v} in {row['definition']}")
                row[k] = None
        if not row["score"] and row["accuracy"]:
            propagated += 1
            row["score"] = row["accuracy"]
    all_rows.extend(rows)
    wks_df = pd.DataFrame(rows)
    #print("rows", len(rows))
    #print("propagated", propagated)
    wks_df["evaluator"] = name
    time.sleep(3)
    
    dfs.append(wks_df)
#eval_df = pd.concat(dfs)
eval_df = pd.DataFrame(all_rows)

#eval_df["evaluator"].unique()

BAD VALUE   in A structural abnormality in which the sperm neck is bent or curved.


In [6]:
#eval_df.groupby(["ontology", "evaluator"]).size()
eval_df.groupby(["ontology"]).size()

ontology
cl        1917
envo      1151
foodon    1223
go         917
hp         355
mondo      472
mp         265
oba        236
uberon     471
dtype: int64

In [7]:
eval_df = eval_df.rename(columns={"label_x": "label", "ontology_x": "ontology", \
                        "accuracy_x": "accuracy", "score_x": "score", "internal_consistency_x": "consistency", \
                        "confidence_x": "confidence", "notes_x": "notes"})

In [8]:
for k in ["score", "accuracy", "internal_consistency"]:
    eval_df[k] = pd.to_numeric(eval_df[k], errors='coerce')

In [9]:
import hashlib
eval_df['evaluator'] = eval_df['evaluator'].apply(lambda x: hashlib.sha256(x.encode()).hexdigest())

## Save as CSV for later analysis

This is analyzed in a separate notebook

In [10]:
eval_df.to_csv("definitions-sheets/combined.csv", index=False)

## Load definitions plus sources


In [17]:
def get_method(obj: dict) -> str:
    """
    gets the method from the yaml object
    """
    if obj.get("generate_background"):
        return "background"
    additional = obj.get("additional_collections", [])
    if additional:
        assert len(additional) == 1
        x = additional[0]
        if x.startswith("gh_"):
            return "github"
        elif x.startswith("devdocs"):
            return "devdocs"
        else:
            assert False
    else:
        return "RAG"

In [65]:
import glob
import yaml
from pathlib import Path
def load_results(files):
    dfs = []
    defs = set()
    for file in files:
        yaml_path = file.replace(".tsv", ".yaml")
        if not Path(yaml_path).exists():
            continue
        meta = yaml.safe_load(open(yaml_path))
        ont = meta.get("source_collection", "").replace("ont_", "")
        res = pd.read_csv(file, comment="#", sep="\t")
        # create a new dataframe copying the predicated_definition column:
        res2 = pd.DataFrame()
        res2["masked_id"] = res["masked_id"]
        res2["label"] = res["feature_label"]
        res2['ontology'] = ont
        res2["definition"] = res["predicted_definition"]
        res2["model_name"] = meta["model_name"]
        res2["method"] = get_method(meta)
        res3 = pd.DataFrame()
        res3["masked_id"] = res["masked_id"]
        res3["label"] = res["feature_label"]
        res3['ontology'] = ont
        res3["definition"] = res["expected_definition"]
        res3["model_name"] = "human"
        res3["method"] = "curator"
        # remove duplicates in res3
        res3 = res3.drop_duplicates(subset=['label', 'ontology', 'definition'])
        dfs.append(res2)
        dfs.append(res3)
    return pd.concat(dfs).drop_duplicates()

load_results(["results/ont_foodon-Pdefinition-Mid.original_id-BGFalse-TrNone-Te50-Mgpt-3.5-turbo-EMopenai.results.tsv"])

Unnamed: 0,masked_id,label,ontology,definition,model_name,method
0,PignutHickoryNut,pignut hickory nut,foodon,A caryopsis fruit of a pignut hickory cultivar...,gpt-3.5-turbo,RAG
1,GreekYogurt,greek yogurt,foodon,Yogurt made by straining whey from the yogurt....,gpt-3.5-turbo,RAG
2,TurkeyMeat_ground_sauteed_,"turkey meat (ground, sauteed)",foodon,Ground and sauteed meat made from turkey parts.,gpt-3.5-turbo,RAG
3,Raw,raw,foodon,,gpt-3.5-turbo,RAG
4,SpeltFoodProduct,spelt food product,foodon,,gpt-3.5-turbo,RAG
...,...,...,...,...,...,...
45,MalnutritionSupplementFoodProduct,malnutrition supplement food product,foodon,A food supplement food product designed to all...,human,curator
46,DuckEggYolk_raw_,duck egg yolk (raw),foodon,,human,curator
47,Parsnip_peeled_,parsnip (peeled),foodon,A parsnip which has been peeled.,human,curator
48,GreenCardamomSeed_dried_,green cardamom seed (dried),foodon,,human,curator


In [66]:
files = glob.glob("results/*Pdefinition*.results.tsv")
res_df = load_results(files)

In [67]:
res_df

Unnamed: 0,masked_id,label,ontology,definition,model_name,method
0,PignutHickoryNut,pignut hickory nut,foodon,A caryopsis fruit of a pignut hickory tree (Ca...,gpt-3.5-turbo,RAG+background
1,GreekYogurt,greek yogurt,foodon,Greek yogurt is a type of yogurt that is made ...,gpt-3.5-turbo,RAG+background
2,TurkeyMeat_ground_sauteed_,"turkey meat (ground, sauteed)",foodon,Ground sautéed turkey meat is a specific type ...,gpt-3.5-turbo,RAG+background
3,Raw,raw,foodon,,gpt-3.5-turbo,RAG+background
4,SpeltFoodProduct,spelt food product,foodon,,gpt-3.5-turbo,RAG+background
...,...,...,...,...,...,...
45,DrySoilAlbedo,dry soil albedo,envo,Soil albedo that specifically focuses on the r...,gpt-3.5-turbo,RAG+background
46,PediatricIntensiveCareUnitFacility,pediatric intensive care unit facility,envo,A specialized healthcare facility that provide...,gpt-3.5-turbo,RAG+background
47,NestOfAlligator,nest of alligator,envo,A specific physical structure or location wher...,gpt-3.5-turbo,RAG+background
48,ConcentrationOfCarbon13AtomInSeaWater,concentration of carbon-13 atom in sea water,envo,The concentration of carbon-13 atom when measu...,gpt-3.5-turbo,RAG+background


In [68]:
res_df["ontology"].unique()

array(['foodon', 'go', 'cl', 'mondo', 'oba', 'obi', 'hp', 'envo',
       'uberon', 'mp'], dtype=object)

In [69]:
res_df["method"].unique()

array(['RAG+background', 'curator', 'RAG', 'RAG+github', 'RAG+devdocs'],
      dtype=object)

In [70]:
res_df["model_name"].unique()

array(['gpt-3.5-turbo', 'human', 'gpt-4', 'nous-hermes-13b'], dtype=object)

In [71]:
res_df.to_csv("definitions-sheets/combined-definitions.csv", index=False)