# Computing the perplexities over the test set

In [1]:
import os
from helper import init_ipynb
envfound = init_ipynb()

DIR = os.environ["DIR_PATH"] if envfound else None
DEVICE = os.environ["DEVICE"] if envfound else None
API_KEY = os.environ["API_KEY"] if envfound else None
PLATFORM = os.environ["OS_TYPE"] if envfound else None

if(PLATFORM == "Darwin"):
    os.environ['PYTORCH_MPS_HIGH_WATERMARK_RATIO'] = '0.0'
%load_ext autoreload
%autoreload 2

In [2]:
from transformers import LlamaForCausalLM, AutoTokenizer
import torch
from torch.nn import CrossEntropyLoss
import datasets as hf_ds
from torch.nn.functional import one_hot
from typing import Dict
from tqdm.notebook import tqdm


testds = [
    hf_ds.load_from_disk("docs/pmc_patiens_fil_test.hf"),
    hf_ds.Dataset.from_list(hf_ds.load_dataset("cryptoni/epi_pubmed")["test"].train_test_split(.08, seed=42)["test"]["train"]).rename_column("abstract", "text"),
]

print(testds)



MODEL_MAX_LENGTH = 1024

def compute_loss(model: LlamaForCausalLM,
                 input_texts: Dict[str, str],
                 tokenizer: AutoTokenizer) -> torch.Tensor:
    """
        Override of loss computation.

        args : 
            - model (AutoModelForCausalLM) :
            - inputs :

        returns :
            loss value, torch tensor with grad_fn
    """
    VOCAB_SIZE = len(tokenizer)
    inputs = tokenizer(input_texts, max_length=MODEL_MAX_LENGTH, truncation=True, padding="max_length", return_tensors="pt").to(DEVICE) 
    predictions = model(**inputs).logits ## model run and extract logits
    loss = CrossEntropyLoss()(predictions.float(),
                              one_hot(
                                  inputs["input_ids"],
                                  num_classes=VOCAB_SIZE
                            ).float()
                    ).cpu() ## Loss computation, comparing the logits and the one hot distrib
    del input_texts
    del predictions
    return loss

[Dataset({
    features: ['text'],
    num_rows: 826
}), Dataset({
    features: ['text', 'title'],
    num_rows: 1241
})]


In [3]:
def ppl(mod: LlamaForCausalLM, test: hf_ds.Dataset, loss: bool=False):
    model = LlamaForCausalLM.from_pretrained(mod).to(DEVICE)
    tokenizer = AutoTokenizer.from_pretrained(mod)
    tokenizer.pad_token = tokenizer.eos_token

    losses = []
    for sample in tqdm(test["text"]):
        losses.append(
            compute_loss(model, [sample], tokenizer).detach()
        )

    del model
    del tokenizer
    return 2**(sum(losses) / len(losses)).cpu().item() if not loss else sum(losses) / len(losses)

## Meditron 7B

In [4]:
ppl_meditron_7b = ppl("epfl-llm/meditron-7b")

Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


  0%|          | 0/826 [00:00<?, ?it/s]

In [13]:
print("Loss for meditron 7b :", ppl_meditron_7b)

NameError: name 'ppl_meditron_7b' is not defined

## LLaMA 3

In [4]:
ppl_ll3 = ppl("meta-llama/Meta-Llama-3-8B", testds[1])

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


  0%|          | 0/1241 [00:00<?, ?it/s]

In [5]:
print("PPL for LLaMA 3 on PubMed : ", ppl_ll3)

PPL for LLaMA 3 on PubMed :  1.0574300304045785


In [15]:
print("Loss for LLaMA 3 :", ppl_ll3)

Loss for LLaMA 3 : 1.0753576460522802


## Epitron LL3.PMCo.N3.E1

In [4]:
ppl_epitron_LL3_PMCo_N3_e1 = ppl("cryptoni/epitron_LL3_PMCo_N3_e1")

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/7 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


  0%|          | 0/826 [00:00<?, ?it/s]

In [5]:
ppl_epitron_LL3_PMCo_N3_e1

1.0731268197411905

## Epitron LL3.PMCo.N3.E1

In [None]:
ppl_epitron_LL3_PMCo_N3_e1 = ppl("cryptoni/epitron_LL3_PMCo_N3_e1")

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/7 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


  0%|          | 0/826 [00:00<?, ?it/s]

In [None]:
ppl_epitron_LL3_PMCo_N3_e1

1.0731268197411905

## Epitron BASELINE.M7B.PMCo.E1

## Epitron LL3.PMCo.E1.N3

In [None]:
ppl_epitron_LL3_PMCo_N2_e1 = ppl("cryptoni/epitron_LL3_PMCo_N2")

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/7 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


  0%|          | 0/826 [00:00<?, ?it/s]

In [None]:
ppl_epitron_LL3_PMCo_N2_e1

1.0731268197411905

## Epitron BASELINE.M7B.PMCo.E1

In [None]:
ppl_epitron_M7B_PMCo_e1 = ppl("cryptoni/epitron_baseline_M7B_PMCo_e1")

In [11]:
print("Loss for EPITRON.M7B.PMCo.E1 :", ppl_epitron_M7B_PMCo_e1)

Loss for EPITRON.M7B.PMCo.E1 : 1.0552800471506047


## Epitron BASELINE.M7B.PMCo.E5

In [11]:
ppl_epitron_M7B_PMCo_e5 = ppl("cryptoni/epitron_baseline_PMCo_M7B")

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


  0%|          | 0/826 [00:00<?, ?it/s]

In [12]:
print("Loss for EPITRON.M7B.PMCo.E5 :", ppl_epitron_M7B_PMCo_e5)

Loss for EPITRON.M7B.PMCo.E5 : 1.0552412232446187


## Epitron BASELINE.M7B.PMCo.E3

In [13]:
ppl_epitron_M7B_PMCo_e3 = ppl("cryptoni/epitron_baseline_PMCo_M7B_e3")

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


  0%|          | 0/826 [00:00<?, ?it/s]

In [14]:
print("Loss for EPITRON.M7B.PMCo.E3 :", ppl_epitron_M7B_PMCo_e3)

Loss for EPITRON.M7B.PMCo.E3 : 1.0552418390534275


In [7]:
ppl_pubmed_pmc = ppl("cryptoni/epitron_pubmed_pmc", testds[1])

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/7 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


  0%|          | 0/1241 [00:00<?, ?it/s]

In [8]:
print("PPL for LL3 trained on Pubmed and PMC on pubmed : ", ppl_pubmed_pmc)

PPL for LL3 trained on Pubmed and PMC on pubmed :  1.0482843168645704


In [9]:
### PPL OF THE BEST OF THE PMC ONLY
ppl_pmc_tr_pubmed_te = ppl("cryptoni/epitron_LL3_PMC_N6", testds[1])

Loading checkpoint shards:   0%|          | 0/7 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


  0%|          | 0/1241 [00:00<?, ?it/s]

In [11]:
### PPL OF THE BEST OF THE PMC ONLY
ppl_pmc_tr_pmc_te = ppl("cryptoni/epitron_LL3_PMC_N6", testds[0])

Loading checkpoint shards:   0%|          | 0/7 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


  0%|          | 0/826 [00:00<?, ?it/s]

In [12]:
print("PPL of LL3 trained on PMC on PMC : ", ppl_pmc_tr_pmc_te)

PPL of LL3 trained on PMC on PMC :  1.0721796112094717


In [10]:
print("PPL of LL3 trained on PMC on pubmed : ", ppl_pmc_tr_pubmed_te)

PPL of LL3 trained on PMC on pubmed :  1.0522408901251359


|model|ppl|dataset|
|:------:|:---------:|:-------:|
|Meditron-7B|1.35|pmc test|
|LLaMA-3-8B|1.075|pmc test|
|epitron.LL3.8B.PMCo.e1|1.073|pmc test|
|epitron.M7B.PMCo.e1|1.06|pmc test|
|epitron.M7B.PMCo.e3|1.055|pmc test|
|epitron.M7B.PMCo.e5|1.055|pmc test|


In [14]:
print("PPM on PMC trained on PMC and Pubmed", ppl("cryptoni/epitron_pubmed_pmc", testds[0]))

Loading checkpoint shards:   0%|          | 0/7 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


  0%|          | 0/826 [00:00<?, ?it/s]

PPM on PMC trained on PMC and Pubmed 1.0704639387388293


### FULL PPLS ON PMCo

In [7]:
import json

In [8]:
MODELS = [
    "meta-llama/Meta-Llama-3-8B", ## BASE
    "cryptoni/epitron_LL3_final_N5_e1", ## N5-E1
    "cryptoni/epitron_LL3_final_N5_e2",
    "cryptoni/epitron_LL3_PMC_N5_e1",
    "cryptoni/epitron_LL3_PMCo_N2", ## N2-E1
    "cryptoni/epitron_LL3_N3_e1",
    "cryptoni/epitron_LL3_PMCo_N2",
]


all_ppls = {}

try :
    with open("docs/all_ppls.json", "r") as f:
        all_ppls = json.load(f)
except :
    print("No trial to resume.")

In [9]:
for model in MODELS:
    if(model not in all_ppls):
        print(f"Processing model : {model}")
        ppli = ppl(model)
        all_ppls[model] = ppli
    else :
        print(f"Already computed for {model} : {all_ppls[model]}")

Already computed for meta-llama/Meta-Llama-3-8B : 1.0753576460522802
Processing model : cryptoni/epitron_LL3_final_N5_e1


model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/7 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


  0%|          | 0/826 [00:00<?, ?it/s]

Processing model : cryptoni/epitron_LL3_final_N5_e2


model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/7 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


  0%|          | 0/826 [00:00<?, ?it/s]

Already computed for cryptoni/epitron_LL3_PMC_N5_e1 : 1.056741346558772
Already computed for cryptoni/epitron_LL3_PMCo_N2 : 1.0731268197411905
Processing model : cryptoni/epitron_LL3_N3_e1


model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/7 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


  0%|          | 0/826 [00:00<?, ?it/s]

Already computed for cryptoni/epitron_LL3_PMCo_N2 : 1.0731268197411905


In [11]:
import json


with open("docs/all_ppls.json", "w") as f:
    json.dump(all_ppls, f)

In [10]:
all_ppls

{'meta-llama/Meta-Llama-3-8B': 1.0753576460522802,
 'cryptoni/epitron_LL3_PMC_N5_e1': 1.056741346558772,
 'cryptoni/epitron_LL3_PMCo_N2': 1.0731268197411905,
 'cryptoni/epitron_LL3_final_N5_e1': 1.0658709393412393,
 'cryptoni/epitron_LL3_final_N5_e2': 1.0412375565890175,
 'cryptoni/epitron_LL3_N3_e1': 1.0339525704571053}

### Validation with MCQ :

In [1]:
import os
from helper import init_ipynb
envfound = init_ipynb()

DIR = os.environ["DIR_PATH"] if envfound else None
DEVICE = os.environ["DEVICE"] if envfound else None
API_KEY = os.environ["API_KEY"] if envfound else None
PLATFORM = os.environ["OS_TYPE"] if envfound else None

if(PLATFORM == "Darwin"):
    os.environ['PYTORCH_MPS_HIGH_WATERMARK_RATIO'] = '0.0'
else :
    import vllm
%load_ext autoreload
%autoreload 2




from transformers import (AutoModelForCausalLM
                         ,LlamaForCausalLM
                         ,AutoTokenizer)
from models import (HF_LLM, GenerationArg, Model, OpenAIGPT)
import torch
import gc
from evaluation import MCQBenchmark
from models.qa_prompts import QA_PROMPTS
from typing import List
from vllm.distributed.parallel_state import destroy_model_parallel
import json
import nltk
nltk.download('punkt')





MARKER = "X00A"

def cot1_prompt_template_support(q: str, shots: List[str], stop_token:str="<|STOP|>") -> str:
    template = QA_PROMPTS["1cot_answer_align"]
    shots = '\n'.join(shots)
    return f"""-system:\n{template['system']}\n{shots}{template['q_form'].format(q=q)}""".replace("###", stop_token)

def cot1_prompt_template_gpt(q: str, shots: List[str]) -> List[str]:
    template = QA_PROMPTS["1cot_answer_align"]
    messages = [
        {
            "role": "system",
            "content": template["system"]
        }
    ]
    for shot in shots:
        contents = shot.replace("-user:", MARKER).replace("-assistant:", MARKER).split(MARKER)
        contents = [x.strip() for x in contents if x != ""]
        messages.append({
            "role": "user",
            "content": contents[0]
        })
        messages.append({
            "role": "assistant",
            "content": contents[1]
        })
    messages.append({
        "role": "user",
        "content": ("Question : " + q)
    })
    return messages



aes = MCQBenchmark(
    "docs/benchmarks/self_assessment/final_processed_valid.json",
    cot1_prompt_template_support,
    support_type="kNN",
    n_shots=3
)


all_generations = {}
MODELS = [
    ["meta-llama/Meta-Llama-3-8B", None ],
    ["cryptoni/epitron_LL3_final_N5_e1",None ],
    ["cryptoni/epitron_LL3_final_N5_e2",None ],
    ["cryptoni/epitron_LL3_PMC_N5_e1",None ],
    ["cryptoni/epitron_LL3_PMCo_N2", None ],
    ["cryptoni/epitron_LL3_N3_e1",None ],
    ["cryptoni/epitron_LL3_PMCo_N2",None ],
    ["cryptoni/epitron_LL3_PMC_N6",None ],
    ["cryptoni/epitron_LL3_PMC_N6_e2",None ],
    ["cryptoni/epitron_LL3_BOTH_N6", None ],
    ["cryptoni/epitron_LL3_PMC_N6", "cryptoni/epitron_sft_n6_full"],
    ["cryptoni/epitron_LL3_PMC_N6_e2", "cryptoni/SFT_LORA_epitron_N6_e2"],
    ["cryptoni/epitron_LL3_PMC_N6_e2", "cryptoni/SFT_LORA_epitron_N6_e3"],
    ["cryptoni/epitron_LL3_PMC_N6_e2", "cryptoni/SFT_LORA_epitron_N6_e4"],
    ["cryptoni/epitron_LL3_PMC_N6_e2", "cryptoni/SFT_LORA_epitron_N6_e5"],
    ["cryptoni/epitron_sft_nolora_n6_e2", None],
    ["meta-llama/Meta-Llama-3-8B-Instruct", None],
    ["cryptoni/epitron_pmc_mimic_n6", None],
    ["cryptoni/epitron_pubmed_pmc", None],
    ["cryptoni/epitron_pmc_pubmed_N5", None]
]

model, adapter = MODELS[-1]
llm = HF_LLM(
    model_name=model,
    arg=GenerationArg(
        temperature=0,
        topp=1,
        max_new_token=512,
        stop_seq="<|STOP|>",
        topk=1,
        use_vllm=True
    ),
    device=DEVICE,
    use_vllm=True,
    lora_path=adapter
)
llm.load()
all_generations[model] = aes.assess(llm)
print("Unloading the model")
destroy_model_parallel()
del llm
gc.collect()
torch.cuda.empty_cache()
torch.distributed.destroy_process_group()


to_store = aes.mcq.copy()
for gen, q in zip(all_generations[model], to_store):
    q["prediction"] = gen.outputs[0].text

fname = f"docs/validation_results_store/{model.split('/')[1]}_{'adapted_' + adapter.split('/')[1] + '_'}valid.json" if adapter is not None else f"docs/validation_results_store/{model.split('/')[1]}_valid.json"
with open(fname, "w") as f:
    json.dump(to_store, f)


We've set API key :  f0555591d0410671711554a53411c6d7


  warn(
[nltk_data] Downloading package punkt to
[nltk_data]     /home/antoinemagron/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


config.json:   0%|          | 0.00/709 [00:00<?, ?B/s]

INFO 06-26 14:00:46 llm_engine.py:100] Initializing an LLM engine (v0.4.2) with config: model='cryptoni/epitron_pmc_pubmed_N5', speculative_config=None, tokenizer='cryptoni/epitron_pmc_pubmed_N5', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.float16, max_seq_len=8192, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), seed=0, served_model_name=cryptoni/epitron_pmc_pubmed_N5)


tokenizer_config.json:   0%|          | 0.00/50.8k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/449 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


generation_config.json:   0%|          | 0.00/194 [00:00<?, ?B/s]

INFO 06-26 14:00:47 utils.py:660] Found nccl from library /home/antoinemagron/.config/vllm/nccl/cu12/libnccl.so.2.18.1
INFO 06-26 14:00:47 selector.py:27] Using FlashAttention-2 backend.
INFO 06-26 14:00:48 weight_utils.py:199] Using model weights format ['*.safetensors']


model-00007-of-00007.safetensors:   0%|          | 0.00/2.57G [00:00<?, ?B/s]

model-00001-of-00007.safetensors:   0%|          | 0.00/4.89G [00:00<?, ?B/s]

model-00005-of-00007.safetensors:   0%|          | 0.00/4.83G [00:00<?, ?B/s]

model-00002-of-00007.safetensors:   0%|          | 0.00/4.83G [00:00<?, ?B/s]

model-00006-of-00007.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00004-of-00007.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00007.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

INFO 06-26 14:10:31 model_runner.py:175] Loading model weights took 14.9595 GB
INFO 06-26 14:10:33 gpu_executor.py:114] # GPU blocks: 13243, # CPU blocks: 2048
INFO 06-26 14:10:35 model_runner.py:937] Capturing the model for CUDA graphs. This may lead to unexpected consequences if the model is not static. To run the model in eager mode, set 'enforce_eager=True' or use '--enforce-eager' in the CLI.
INFO 06-26 14:10:35 model_runner.py:941] CUDA graphs can take additional 1~3 GiB memory per GPU. If you are running out of memory, consider decreasing `gpu_memory_utilization` or enforcing eager mode. You can also reduce the `max_num_seqs` as needed to decrease memory usage.
INFO 06-26 14:10:41 model_runner.py:1017] Graph capturing finished in 6 secs.
[['-user:\nQuestion : Deep brain stimulation for drug-resistant epilepsy targets which of the following structures?\n        A. anterior nucleus of the thalamus\n        B. centromedian nucleus of the thalamus\n        C. cingulate gyrus\n      

Processed prompts: 100%|██████████| 78/78 [00:14<00:00,  5.32it/s]


Unloading the model


### Validation statistics :

In [2]:
MODELS = [
    "epitron_LL3_BOTH_N6",
    "Meta-Llama-3-8B", ## BASE
    "epitron_LL3_final_N5_e1", ## N5-E1
    "epitron_LL3_final_N5_e2",
    "epitron_LL3_PMC_N5_e1",
    "epitron_LL3_PMCo_N2", ## N2-E1
    "epitron_LL3_N3_e1",
    "epitron_LL3_PMCo_N2",
    "epitron_LL3_PMC_N6",
    "epitron_LL3_PMC_N6_e2",
    "epitron_LL3_PMC_N6_adapted_epitron_sft_n6_full",
    "epitron_LL3_PMC_N6_e2_adapted_SFT_LORA_epitron_N6_e2",
    "epitron_LL3_PMC_N6_e2_adapted_SFT_LORA_epitron_N6_e3",
    "epitron_LL3_PMC_N6_e2_adapted_SFT_LORA_epitron_N6_e4",
    "epitron_LL3_PMC_N6_e2_adapted_SFT_LORA_epitron_N6_e5",
    "epitron_sft_nolora_n6_e2",
    "Meta-Llama-3-8B-Instruct",
    "epitron_pmc_mimic_n6",
    "epitron_pubmed_pmc",
    "epitron_pmc_pubmed_N5"
]



FLAGS = ["Therefore", "correct", "answer"]
def match_score(sentence):
    return len([flag for flag in FLAGS if flag in sentence]) / len(FLAGS)

def simple_extract(ans_sentence):
    sel = [l for l in ["A", "B", "C", "D", "E"] if l in ans_sentence]
    return sel[0] if sel != [] else "-1"

def csa2(pred):
    if(type(pred) != str):
        pred = pred.outputs[0].text
    sent_text = nltk.sent_tokenize(pred.replace("\n", "??"))
    sentence_score = sorted([[sent, match_score(sent)] for sent in sent_text], key=lambda _: _[1], reverse=True)
    if(len(sentence_score) == 0):
        return "-1"
    sentence, score = sentence_score[0]
    if(score == 0):
        return "-1"
    return simple_extract(sentence)


gens = {}


for model in MODELS:
    if(os.path.exists(f"docs/validation_results_store/{model}_valid.json")):
        print(f"- loading results for {model}")
        with open(f"docs/validation_results_store/{model}_valid.json", "r") as f:
            gens[model] = json.load(f)
    else :
        print(f"- no stored results for {model}")
    
scores = {}
complete_scores = []
for model, gen in gens.items():
    print(f"- {model} : ")
    print("Score on full AES final : ", round(score := sum([(csa2(g["prediction"]) == g["answer"]) for g in gen]) * 100 / len(gen), 2), "%")
    complete_scores.append(score)

- loading results for epitron_LL3_BOTH_N6
- loading results for Meta-Llama-3-8B
- loading results for epitron_LL3_final_N5_e1
- loading results for epitron_LL3_final_N5_e2
- loading results for epitron_LL3_PMC_N5_e1
- loading results for epitron_LL3_PMCo_N2
- loading results for epitron_LL3_N3_e1
- loading results for epitron_LL3_PMCo_N2
- loading results for epitron_LL3_PMC_N6
- loading results for epitron_LL3_PMC_N6_e2
- loading results for epitron_LL3_PMC_N6_adapted_epitron_sft_n6_full
- loading results for epitron_LL3_PMC_N6_e2_adapted_SFT_LORA_epitron_N6_e2
- loading results for epitron_LL3_PMC_N6_e2_adapted_SFT_LORA_epitron_N6_e3
- loading results for epitron_LL3_PMC_N6_e2_adapted_SFT_LORA_epitron_N6_e4
- loading results for epitron_LL3_PMC_N6_e2_adapted_SFT_LORA_epitron_N6_e5
- loading results for epitron_sft_nolora_n6_e2
- loading results for Meta-Llama-3-8B-Instruct
- loading results for epitron_pmc_mimic_n6
- loading results for epitron_pubmed_pmc
- loading results for epitro

## Quick check of LLaMA-3-8B

In [7]:
import json


paths = [
    "docs/validation_results_store/epitron_LL3_PMC_N6_e2_valid.json",
    "docs/validation_results_store/Meta-Llama-3-8B_valid.json",
    "docs/validation_results_store/epitron_pubmed_pmc_valid.json"
]

gens = []
for path in paths :
    with open(path, "r") as f:
        gens.append(json.load(f))

for g1, g2, g3 in zip(*gens):
    print("*"*100)
    print("BEST " + "*"*30)
    print(g1["prediction"])
    print("BASE " + "*"*30)
    print(g2["prediction"])
    print("PMC PUBMED " + "*"*30)
    print(g3["prediction"])
    print("CORRECT ANSWER : " + g2["answer"])

****************************************************************************************************
BEST ******************************
 Autosomal dominant epilepsy with auditory features is most often associated with a pathogenic variant in the LGI1 gene. This is an autosomal dominant disorder characterized by febrile seizures, ataxia, and auditory hallucinations. 
Therefore, the correct answer is C.
BASE ******************************
 Autosomal dominant epilepsy with auditory features is most often associated with a pathogenic variant in the LGI1 gene. This is an autosomal dominant disorder characterized by febrile seizures, ataxia, and auditory hallucinations. The seizures are typically generalized tonic-clonic or complex partial in nature. 
Therefore, the correct answer is C.
PMC PUBMED ******************************
 Autosomal dominant epilepsy with auditory features is most often associated with a pathogenic variant in the LGI1 gene. This is an autosomal dominant disorder chara

## PPLS 



##### PPL on PubMed Test :

- base : 1.05743
- trained on PMC : 1.0522
- trained on PMC pubmed : 1.0483


#### PPL on PMC

- base : 1.07536
- 