In [1]:
exp_name = 'HF-43'
import json
from pathlib import Path
best_metrics = []
best_checkpoints = []

for fold in range(5):
    folder = Path(f"../output/{exp_name}-fold{fold}")
    checkpoint = sorted(list(folder.glob("checkpoint*")))[-1]
    with open(checkpoint/"trainer_state.json", "r") as fp:
        data = json.load(fp)
        best_metrics.append(data["best_metric"])
        best_checkpoints.append(data["best_model_checkpoint"])
        
print(best_metrics)
average = sum(best_metrics)/len(best_metrics)
average

[0.5772432088851929, 0.5771093368530273, 0.5714321732521057, 0.5780883431434631, 0.5812585949897766]


0.5770263314247132

In [2]:
best_checkpoints

['../output/HF-43-fold0/checkpoint-2100',
 '../output/HF-43-fold1/checkpoint-1950',
 '../output/HF-43-fold2/checkpoint-2050',
 '../output/HF-43-fold3/checkpoint-1800',
 '../output/HF-43-fold4/checkpoint-2000']

In [19]:
DEBUG = False
cfg = {
    "num_proc": 2,
    "aug_prob": 0.05,
    "k_folds": 5,
    "max_length": 2048,
    "padding": False,
    "stride": 0,
    "data_dir": "../input/feedback-prize-effectiveness",
    "load_from_disk": None,
    "pad_multiple": 8,
    "model_name_or_path": "../output/HF-pret-7-fold0/checkpoint-16605/",
    "dropout": 0.0,
    "trainingargs": {
        "output_dir": f"../output/{exp_name}",
        "do_train": True,
        "do_eval": True,
        "per_device_train_batch_size": 8,
        "per_device_eval_batch_size": 4,
        "learning_rate": 1.2e-5,
        # "label_smoothing_factor": 0.05,
        "weight_decay": 0.01,
        "num_train_epochs": 2.2,
        "warmup_ratio": 0.1,
        "optim": 'adamw_torch',
        "logging_steps": 25,
        "save_strategy": "steps",
        "save_steps": 25,
        "evaluation_strategy": "steps",
        "eval_steps": 25,
        "eval_delay": 600,
        "report_to": "wandb",
        "group_by_length": True,
        "save_total_limit": 1,
        "metric_for_best_model": "loss",
        "greater_is_better": False,
        "seed": 42,
        "fp16": True,
        "gradient_checkpointing": True,
        "gradient_accumulation_steps": 1,
    }
}

In [20]:
import re
import pickle
import codecs
import warnings
import logging
from functools import partial
from pathlib import Path
from itertools import chain
from text_unidecode import unidecode
from typing import Any, Optional, Tuple

import pandas as pd
from sklearn.model_selection import KFold
from transformers import AutoTokenizer, set_seed

from datasets import Dataset, load_from_disk

def replace_encoding_with_utf8(error: UnicodeError) -> Tuple[bytes, int]:
    return error.object[error.start : error.end].encode("utf-8"), error.end

def replace_decoding_with_cp1252(error: UnicodeError) -> Tuple[str, int]:
    return error.object[error.start : error.end].decode("cp1252"), error.end

codecs.register_error("replace_encoding_with_utf8", replace_encoding_with_utf8)
codecs.register_error("replace_decoding_with_cp1252", replace_decoding_with_cp1252)

def resolve_encodings_and_normalize(text: str) -> str:
    text = (
        text.encode("raw_unicode_escape")
        .decode("utf-8", errors="replace_decoding_with_cp1252")
        .encode("cp1252", errors="replace_encoding_with_utf8")
        .decode("utf-8", errors="replace_decoding_with_cp1252")
    )
    text = unidecode(text)
    return text

def read_text_files(example, data_dir):
    
    id_ = example["essay_id"]
    
    with open(data_dir / "train" / f"{id_}.txt", "r") as fp:
        example["text"] = resolve_encodings_and_normalize(fp.read())
    
    return example

set_seed(cfg["trainingargs"]["seed"])

warnings.simplefilter('ignore')
logging.disable(logging.WARNING)

In [21]:
essay_folds = pd.read_csv('../input/feedback-folds/df_folds.csv')
essay_folds.head()
essay_folds_dict = {x:y for x,y in zip(essay_folds.essay_id.values.tolist(), essay_folds.fold_k_5_seed_42.values.tolist())}

In [22]:
data_dir = Path(cfg["data_dir"])

if cfg["load_from_disk"]:
    if not cfg["load_from_disk"].endswith(".dataset"):
        cfg["load_from_disk"] += ".dataset"
    ds = load_from_disk(cfg["load_from_disk"])
    
    pkl_file = f"{cfg['load_from_disk'][:-len('.dataset')]}_pkl"
    with open(pkl_file, "rb") as fp: 
        grouped = pickle.load(fp)
        
    print("loading from saved files")
else:
    train_df = pd.read_csv(data_dir / "train.csv")
    
    train_df = train_df[train_df.discourse_id != '56744a66949a'].reset_index(drop=True)
    
    if DEBUG: train_df = train_df.sample(n=100).reset_index(drop=True)
    
    text_ds = Dataset.from_dict({"essay_id": train_df.essay_id.unique()})
    
    text_ds = text_ds.map(
        partial(read_text_files, data_dir=data_dir),
        num_proc=cfg["num_proc"],
        batched=False,
        desc="Loading text files",
    )
    
    text_df = text_ds.to_pandas()
    
    train_df["discourse_text"] = [
        resolve_encodings_and_normalize(x) for x in train_df["discourse_text"]
    ]
    
    train_df = train_df.merge(text_df, on="essay_id", how="left")
    
disc_types = [
    "Claim",
    "Concluding Statement",
    "Counterclaim",
    "Evidence",
    "Lead",
    "Position",
    "Rebuttal",
]

cls_tokens_map = {label: f"[CLS_{label.upper()}]" for label in disc_types}
end_tokens_map = {label: f"[END_{label.upper()}]" for label in disc_types}

label2id = {
    "Adequate": 0,
    "Effective": 1,
    "Ineffective": 2,
}

tokenizer = AutoTokenizer.from_pretrained(cfg["model_name_or_path"])
tokenizer.add_special_tokens(
    {"additional_special_tokens": list(cls_tokens_map.values())+list(end_tokens_map.values())}
)

cls_id_map = {
    label: tokenizer.encode(tkn)[1]
    for label, tkn in cls_tokens_map.items()
}

Loading text files #0:   0%|                                                | 0/2096 [00:00<?, ?ex/s]
Loading text files #0:  11%|███▊                                | 221/2096 [00:00<00:00, 2200.90ex/s][A
Loading text files #0:  21%|███████▋                            | 449/2096 [00:00<00:00, 2246.22ex/s][A
Loading text files #0:  33%|███████████▋                        | 684/2096 [00:00<00:00, 2292.74ex/s][A
Loading text files #1:  33%|███████████▊                        | 687/2095 [00:00<00:00, 2306.74ex/s][A
Loading text files #0:  54%|██████████████████▉                | 1137/2096 [00:00<00:00, 2169.33ex/s][A
Loading text files #0:  65%|██████████████████████▉            | 1372/2096 [00:00<00:00, 2224.68ex/s][A
Loading text files #0:  77%|██████████████████████████▉        | 1610/2096 [00:00<00:00, 2274.28ex/s][A
Loading text files #0:  88%|██████████████████████████████▊    | 1847/2096 [00:00<00:00, 2300.27ex/s][A
Loading text files #0: 100%|██████████████████████████████

In [23]:
def find_positions(example):

    text = example["text"][0]
    
    # keeps track of what has already
    # been located
    min_idx = 0
    
    # stores start and end indexes of discourse_texts
    idxs = []
    
    for dt in example["discourse_text"]:
        # calling strip is essential
        matches = list(re.finditer(re.escape(dt.strip()), text))
        
        # If there are multiple matches, take the first one
        # that is past the previous discourse texts.
        if len(matches) > 1:
            for m in matches:
                if m.start() >= min_idx:
                    break
        # If no matches are found
        elif len(matches) == 0:
            idxs.append([-1]) # will filter out later
            continue  
        # If one match is found
        else:
            m = matches[0]
            
        idxs.append([m.start(), m.end()])

        min_idx = m.start()

    return idxs

def tokenize(example):
    example["idxs"] = find_positions(example)

    text = example["text"][0]
    text = text.replace('\n', '|')

    chunks = []
    labels = []
    prev = 0

    zipped = zip(
        example["idxs"],
        example["discourse_type"],
        example["discourse_effectiveness"],
    )
    for idxs, disc_type, disc_effect in zipped:
        # when the discourse_text wasn't found
        if idxs == [-1]:
            continue

        s, e = idxs

        # if the start of the current discourse_text is not 
        # at the end of the previous one.
        # (text in between discourse_texts)
        if s != prev:
            chunks.append(text[prev:s])
            prev = s

        # if the start of the current discourse_text is 
        # the same as the end of the previous discourse_text
        if s == prev:
            chunks.append(cls_tokens_map[disc_type])
            chunks.append(text[s:e])
            chunks.append(end_tokens_map[disc_type])
        
        prev = e

        labels.append(label2id[disc_effect])

    tokenized = tokenizer(
        " ".join(chunks),
        padding=False,
        truncation=True,
        max_length=cfg["max_length"],
        add_special_tokens=True,
    )
    
    # at this point, labels is not the same shape as input_ids.
    # The following loop will add -100 so that the loss function
    # ignores all tokens except CLS tokens

    # idx for labels list
    idx = 0
    final_labels = []
    for id_ in tokenized["input_ids"]:
        # if this id belongs to a CLS token
        if id_ in cls_id_map.values():
            final_labels.append(labels[idx])
            idx += 1
        else:
            # -100 will be ignored by loss function
            final_labels.append(-100)
    
    tokenized["labels"] = final_labels

    return tokenized

In [24]:
def add_fold(example):
    example["fold"] = essay_folds_dict[example["essay_id"]]
    return example

In [25]:
# I frequently restart my notebook, so to reduce time
# you can set this to just load the tokenized dataset from disk.
# It gets loaded in the 3rd code cell, but a check is done here
# to skip tokenizing
if cfg["load_from_disk"] is None:

    # make lists of discourse_text, discourse_effectiveness
    # for each essay
    grouped = train_df.groupby(["essay_id"]).agg(list)

    ds = Dataset.from_pandas(grouped)

    ds = ds.map(
        tokenize,
        batched=False,
        num_proc=cfg["num_proc"],
        desc="Tokenizing",
    )

    save_dir = f"{cfg['trainingargs']['output_dir']}"
    ds.save_to_disk(f"{save_dir}.dataset")
    with open(f"{save_dir}_pkl", "wb") as fp:
        pickle.dump(grouped, fp)
    print("Saving dataset to disk:", cfg['trainingargs']['output_dir'])


Tokenizing #0:   5%|██                                            | 95/2096 [00:00<00:15, 132.72ex/s]
Tokenizing #0:   5%|██▎                                          | 110/2096 [00:00<00:14, 135.74ex/s][A
Tokenizing #0:   6%|██▋                                          | 125/2096 [00:00<00:14, 138.53ex/s][A
Tokenizing #0:   8%|███▌                                         | 167/2096 [00:01<00:11, 171.78ex/s][A
Tokenizing #0:   9%|███▉                                         | 186/2096 [00:01<00:10, 174.13ex/s][A
Tokenizing #0:  10%|████▍                                        | 204/2096 [00:01<00:10, 172.25ex/s][A
Tokenizing #0:  11%|████▊                                        | 226/2096 [00:01<00:10, 185.15ex/s][A
Tokenizing #0:  12%|█████▎                                       | 247/2096 [00:01<00:09, 192.03ex/s][A
Tokenizing #0:  13%|█████▋                                       | 267/2096 [00:01<00:10, 179.27ex/s][A
Tokenizing #0:  14%|██████▏                               

Saving dataset to disk: ../output/HF-43


In [26]:
ds = ds.map(add_fold)

100%|███████████████████████████████████████████████████████████| 4191/4191 [00:05<00:00, 813.38ex/s]


In [27]:
bad_matches = []
cls_ids = set(list(cls_id_map.values()))
for id_, l, ids, dt in zip(ds["essay_id"], ds["labels"], ds["input_ids"], grouped.discourse_text):
    
    # count number of labels (ignoring -100)
    num_cls_label = sum([x!=-100 for x in l])
    # count number of cls ids
    num_cls_id = sum([x in cls_ids for x in ids])
    # true number of discourse_texts
    num_dt = len(dt)
    
    if num_cls_label != num_dt or num_cls_id != num_dt:
        bad_matches.append((id_, l, ids, dt))
        
print("Num bad matches", len(bad_matches))

Num bad matches 0


In [28]:
ds

Dataset({
    features: ['discourse_id', 'discourse_text', 'discourse_type', 'discourse_effectiveness', 'text', 'essay_id', 'idxs', 'input_ids', 'token_type_ids', 'attention_mask', 'labels', 'fold'],
    num_rows: 4191
})

In [29]:
keep_df = {"discourse_id", "essay_id", "discourse_text", "discourse_type", "discourse_effectiveness", "labels", "fold"}
test_df = ds.remove_columns([c for c in ds.column_names if c not in keep_df]).to_pandas()
test_df.head()

Unnamed: 0,discourse_id,discourse_text,discourse_type,discourse_effectiveness,essay_id,labels,fold
0,"[fe6dfbd53216, ca9e1b60c9fb, 6cf2157f4f19, d92...",[Driverless cars are exaclty what you would ex...,"[Lead, Position, Claim, Evidence, Claim, Evide...","[Adequate, Effective, Effective, Effective, Ef...",00066EA9880D,"[-100, 0, -100, -100, -100, -100, -100, -100, ...",2
1,"[695d181861a1, cd97ee1cc0ad, 1b775274990b, 567...","[I am arguing against the policy change , even...","[Position, Counterclaim, Rebuttal, Evidence, C...","[Adequate, Adequate, Adequate, Adequate, Adequ...",000E6DE9E817,"[-100, -100, -100, -100, -100, -100, 0, -100, ...",2
2,"[89304284cef1, 4f2e871a4908, a885c3aa214b, 953...",[I think that students would benefit from lear...,"[Position, Claim, Claim, Claim, Claim, Evidenc...","[Adequate, Adequate, Adequate, Adequate, Adequ...",0016926B079C,"[-100, 0, -100, -100, -100, -100, -100, -100, ...",3
3,"[a713d0f6dc68, 2fd9bb2bfedf, 0e5ecdf1516e, 499...",[It is every student's dream to be able to lou...,"[Lead, Position, Evidence, Counterclaim, Rebut...","[Effective, Effective, Effective, Adequate, Ef...",00203C45FC55,"[-100, 1, -100, -100, -100, -100, -100, -100, ...",3
4,"[1082de1aa198, e425994b2124, bf086f9911f6, 29c...",[I heard you are considering changing the scho...,"[Lead, Position, Claim, Evidence, Counterclaim...","[Adequate, Effective, Ineffective, Adequate, A...",0029F4D19C3F,"[-100, -100, -100, -100, -100, -100, 0, -100, ...",3


In [30]:
import gc
import torch
from torch.utils.checkpoint import checkpoint
import numpy as np
from tqdm import tqdm
from transformers import Trainer, TrainingArguments, AutoConfig, AutoModelForTokenClassification, DataCollatorForTokenClassification
import sklearn



args = TrainingArguments(**cfg["trainingargs"])

# if using longformer pad to multiple of 512
# for others pad to multiple of 8

collator = DataCollatorForTokenClassification(
    tokenizer=tokenizer, pad_to_multiple_of=cfg["pad_multiple"], padding=True
)

output = args.output_dir

fold_dfs = []

for fold in range(cfg["k_folds"]):
    
    args.output_dir = f"{output}-fold{fold}"
    
    model_config = AutoConfig.from_pretrained(
        cfg["model_name_or_path"],
    )
    model_config.update(
        {
            "num_labels": 3,
            "cls_tokens": list(cls_id_map.values()),
            "label2id": label2id,
            "id2label": {v:k for k, v in label2id.items()},
        }
    )
    
    model = AutoModelForTokenClassification.from_pretrained(cfg["model_name_or_path"], config=model_config)
    
    # need to resize embeddings because of added tokens
    model.resize_token_embeddings(len(tokenizer))
    
    PATH = f'{best_checkpoints[fold]}/pytorch_model.bin'
    
    model.load_state_dict(torch.load(PATH))
    
    # split dataset to train and eval
    keep_cols = {"input_ids", "attention_mask", "labels"}
    eval_dataset = ds.filter(lambda example: example["fold"] == fold).remove_columns([c for c in ds.column_names if c not in keep_cols])
   
    trainer = Trainer(
        model=model,
        args=args,
        tokenizer=tokenizer,
        data_collator=collator,
    )
    
    preds = trainer.predict(eval_dataset)
    preds_torch = torch.tensor(preds.predictions, dtype=torch.float32)
    
    all_preds = []
    all_logits = []
    all_labels = []

    for i in tqdm(range(len(eval_dataset))):
        indices = np.array(eval_dataset[i]['labels']) != -100
        mylabls = torch.tensor(np.array(eval_dataset[i]['labels']))[indices]
        mylogits = preds_torch[i][:len(indices),:][indices]
        mypreds = torch.nn.functional.softmax(mylogits, dim=-1)
        all_preds.append(mypreds)
        all_logits.append(mylogits)
        all_labels.append(mylabls)

    all_preds = torch.cat(all_preds, dim=0).numpy()
    all_logits = torch.cat(all_logits, dim=0).numpy()
    all_labels = torch.cat(all_labels, dim=0).numpy()

    full_eval = ds.filter(lambda example: example["fold"] == fold)
    
    assert(len(eval_dataset) == len(full_eval))
    df = pd.DataFrame()
    df['discourse_id'] = [x for z in full_eval['discourse_id'] for x in z]
    df['preds'] = [x for x in all_preds]
    df['Ineffective'] = all_logits[:,2]
    df['Adequate'] = all_logits[:,0]
    df['Effective'] = all_logits[:,1]
    df['labels'] = all_labels
    df['discourse_type'] = [x for z in full_eval['discourse_type'] for x in z]
    df['discourse_effectiveness'] = [x for z in full_eval['discourse_effectiveness'] for x in z]
    df['discourse_text'] = [x for z in full_eval['discourse_text'] for x in z]
    df['loss'] = [sklearn.metrics.log_loss(np.expand_dims(np.array(x), 0), np.expand_dims(y, 0), labels=[0,1,2]) for x,y in zip(df.labels.values, np.stack(df.preds.values))]

    fold_dfs.append(df)
    
    del model
    gc.collect()
    torch.cuda.empty_cache()

full_df = pd.concat(fold_dfs).reset_index(drop=True)
full_df.head()

100%|██████████████████████████████████████████████████████████████████| 5/5 [00:03<00:00,  1.51ba/s]


100%|█████████████████████████████████████████████████████████████| 838/838 [00:01<00:00, 730.84it/s]
100%|██████████████████████████████████████████████████████████████████| 5/5 [00:03<00:00,  1.55ba/s]
100%|██████████████████████████████████████████████████████████████████| 5/5 [00:03<00:00,  1.55ba/s]


100%|█████████████████████████████████████████████████████████████| 834/834 [00:01<00:00, 719.12it/s]
100%|██████████████████████████████████████████████████████████████████| 5/5 [00:03<00:00,  1.49ba/s]
100%|██████████████████████████████████████████████████████████████████| 5/5 [00:03<00:00,  1.52ba/s]


100%|█████████████████████████████████████████████████████████████| 843/843 [00:01<00:00, 728.98it/s]
100%|██████████████████████████████████████████████████████████████████| 5/5 [00:03<00:00,  1.50ba/s]
100%|██████████████████████████████████████████████████████████████████| 5/5 [00:03<00:00,  1.54ba/s]


100%|█████████████████████████████████████████████████████████████| 838/838 [00:01<00:00, 716.65it/s]
100%|██████████████████████████████████████████████████████████████████| 5/5 [00:03<00:00,  1.47ba/s]
100%|██████████████████████████████████████████████████████████████████| 5/5 [00:03<00:00,  1.54ba/s]


100%|█████████████████████████████████████████████████████████████| 838/838 [00:01<00:00, 712.88it/s]
100%|██████████████████████████████████████████████████████████████████| 5/5 [00:03<00:00,  1.48ba/s]


Unnamed: 0,discourse_id,preds,Ineffective,Adequate,Effective,labels,discourse_type,discourse_effectiveness,discourse_text,loss
0,4bbb368a6ffd,"[0.046331882, 0.95190096, 0.0017672037]",-3.269531,-0.003099,3.019531,1,Lead,Effective,In life all of us suffer many trials and obsta...,0.049294
1,d4bb753babd0,"[0.07228731, 0.92419904, 0.0035136726]",-2.927734,0.096252,2.644531,1,Claim,Effective,"it could help you explore different mindsets,",0.078828
2,62ececba9b36,"[0.04758436, 0.9457763, 0.0066394005]",-2.382812,-0.41333,2.576172,1,Claim,Effective,"get an outside unbiased opinion,",0.055749
3,4a70f8078d80,"[0.047189448, 0.9462069, 0.006603617]",-2.337891,-0.371338,2.626953,1,Claim,Effective,give you a chance to express and organize your...,0.055294
4,60861279dee4,"[0.06992228, 0.92849594, 0.0015818041]",-3.326172,0.462646,3.048828,1,Evidence,Effective,Talking to someone to get an outside opinion c...,0.074189


In [31]:
sklearn.metrics.log_loss(full_df.labels.values, np.stack(full_df.preds.values), labels=[0,1,2])

0.5768097096237481

In [32]:
label2id

{'Adequate': 0, 'Effective': 1, 'Ineffective': 2}

In [33]:
full_df.discourse_text.loc[18416]

'Venus is a worthy planet because it does not have all of man kind on it destroying it or usig it. Venus is a place where some people go to see outisde of our world to see what space really does look like. '

In [34]:
full_df.sort_values('loss', ascending=False)

Unnamed: 0,discourse_id,preds,Ineffective,Adequate,Effective,labels,discourse_type,discourse_effectiveness,discourse_text,loss
10906,749e46ad80ae,"[0.050953917, 0.9475648, 0.0014813559]",-3.335938,0.202026,3.125000,2,Evidence,Ineffective,"Most people are too busy, or lazy, or just don...",6.514798
9006,92da3a4535b2,"[0.42380732, 0.001985752, 0.57420695]",1.961914,1.658203,-3.705078,1,Claim,Effective,Its easy to do because you don't have to count...,6.221757
10910,81a1eb3bf903,"[0.059211794, 0.9385202, 0.002268035]",-3.177734,0.084473,2.847656,2,Evidence,Ineffective,The majority does not follow the consitiution ...,6.088841
18416,5a82a0a5e324,"[0.49025536, 0.002442858, 0.5073018]",1.398438,1.364258,-3.937500,1,Position,Effective,Venus is a worthy planet because it does not h...,6.014586
24215,06c127fdd675,"[0.4468406, 0.0029990503, 0.55016035]",1.352539,1.144531,-3.859375,1,Claim,Effective,it not only benifits congress it aso benifits ...,5.809460
...,...,...,...,...,...,...,...,...,...,...
27095,6ff6393814d8,"[0.005615818, 0.00026536238, 0.9941188]",4.230469,-0.945801,-3.998047,2,Evidence,Ineffective,Luke Bomberger was join to the program because...,0.005899
29322,c054a9fdc7fb,"[0.005423366, 0.00017373897, 0.99440295]",4.378906,-0.832520,-4.273438,2,Evidence,Ineffective,"In this article i read about ""Making Mona Lisa...",0.005613
33707,511f05b41c6b,"[0.0052015893, 0.00015699773, 0.9946414]",4.335938,-0.917480,-4.417969,2,Evidence,Ineffective,One day in the lab two people were arguing ove...,0.005373
24077,88cc2dc55b33,"[0.99468404, 0.0027721205, 0.002543841]",-2.287109,3.681641,-2.201172,0,Rebuttal,Adequate,what happens if our family have a emergency an...,0.005330


In [35]:
full_df.groupby('discourse_effectiveness')['loss'].mean()

discourse_effectiveness
Adequate       0.405714
Effective      0.593462
Ineffective    1.108270
Name: loss, dtype: float64

In [36]:
full_df.groupby('discourse_type')['loss'].mean()

discourse_type
Claim                   0.582025
Concluding Statement    0.529020
Counterclaim            0.584526
Evidence                0.592039
Lead                    0.600233
Position                0.506424
Rebuttal                0.680698
Name: loss, dtype: float64

In [37]:
full_df.groupby(['discourse_type', 'discourse_effectiveness'])['loss'].mean()

discourse_type        discourse_effectiveness
Claim                 Adequate                   0.380930
                      Effective                  0.599008
                      Ineffective                1.510394
Concluding Statement  Adequate                   0.392705
                      Effective                  0.498478
                      Ineffective                1.028728
Counterclaim          Adequate                   0.334127
                      Effective                  0.796743
                      Ineffective                1.556486
Evidence              Adequate                   0.501203
                      Effective                  0.492898
                      Ineffective                0.857284
Lead                  Adequate                   0.445378
                      Effective                  0.563905
                      Ineffective                1.197625
Position              Adequate                   0.262876
                      Effe

In [38]:
exp_name

'HF-43'

In [39]:
full_df.to_csv(f'../output/{exp_name}-OOF.csv', index=False)