# Initialization

In [1]:
# !pip install datasets
# !pip install accelerate -U

In [2]:
import json
import random
from os import path

import numpy as np
import pandas as pd
import requests
import torch

from datasets import Dataset
from tqdm import tqdm
from google.colab import drive
from copy import deepcopy

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments

In [3]:
RANDOM_STATE = 1403

In [4]:
random.seed(RANDOM_STATE)
tqdm.pandas()

In [5]:
PROJECT_PATH = ''
SUBMISSION_PATH = PROJECT_PATH + 'submission/'

DATA_PATH = PROJECT_PATH + 'data/'
CHALLENGE_DATA_PATH = DATA_PATH + 'challenge/'

OUTPUT_PATH = PROJECT_PATH + 'output/'
CHALLENGE_OUTPUT_PATH = OUTPUT_PATH + 'challenge/'

MODEL_PATH = PROJECT_PATH + 'model/'
CHALLENGE_MODEL_PATH = MODEL_PATH + 'challenge/'

LOG_PATH = PROJECT_PATH + 'log/'
CHALLENGE_LOG_PATH = LOG_PATH + 'challenge/'

In [None]:
if not path.exists('/content/drive'):
  drive.mount('/content/drive')

In [7]:
def pprint(obj, level=-1):
    if isinstance(obj, list) or isinstance(obj, np.ndarray):
        pprint('[', level)
        for e in obj:
            pprint(e, level + 1)
        pprint(']', level)

    else:
        print('  ' * level + str(obj))

# Generate Full Training Set and Test Set

## Load train dataset

In [8]:
data_url_true = "https://raw.githubusercontent.com/HamedBabaei/" + \
      "LLMs4OL-Challenge-ISWC2024/main/TaskB-Taxonomy%20Discovery/" + \
      "SubTask%20B.1(FS)%20-%20GeoNames/geoname_train_pairs.json"
data_true = json.loads(requests.get(data_url_true).text)

In [9]:
df_true = pd.DataFrame(data_true)
df_true['label'] = True

df_true

Unnamed: 0,parent,child,label
0,"mountain, hill, rock",karst area,True
1,"spot, building, farm",dike,True
2,"forest, heath",forest(s),True
3,"stream, lake",wells,True
4,"stream, lake",salt pond,True
...,...,...,...
471,"spot, building, farm",triangulation station,True
472,"stream, lake",tidal flat(s),True
473,"spot, building, farm",asylum,True
474,"stream, lake",icecap ridge,True


In [10]:
df_true['parent'].value_counts()

Unnamed: 0_level_0,count
parent,Unnamed: 1_level_1
"spot, building, farm",180
"stream, lake",99
"mountain, hill, rock",59
undersea,49
"parks, area",35
"forest, heath",15
"road, railroad",14
"country, state, region",13
"city, village",12


In [11]:
parents = df_true['parent'].unique()

print("List of parents:")
pprint(parents, 0)

print(f"\nThe number of unique parents: {len(df_true['parent'].unique())}")

List of parents:
[
  mountain, hill, rock
  spot, building, farm
  forest, heath
  stream, lake
  undersea
  parks, area
  city, village
  country, state, region
  road, railroad
]

The number of unique parents: 9


## Separate train and eval sets

In [None]:
df_true_train, df_true_eval = train_test_split(
    df_true,
    test_size=76,
    stratify=df_true['parent'],
    random_state=RANDOM_STATE,
)

df_true_train = df_true_train.reset_index(drop=True)
df_true_eval = df_true_eval.reset_index(drop=True)

df_true_eval

Unnamed: 0,parent,child,label
0,"spot, building, farm",agricultural school,True
1,"stream, lake",wadi,True
2,"stream, lake",sound,True
3,"spot, building, farm",facility center,True
4,"forest, heath",orchard(s),True
...,...,...,...
71,"stream, lake",icecap depression,True
72,"stream, lake",abandoned canal,True
73,"stream, lake",lake,True
74,"spot, building, farm",spillway,True


## Generate negative samples

### Reversed dataset

In [None]:
def reverse_df(df):
    df_reversed = df.copy()
    df_reversed['child'] = df_reversed['parent']
    df_reversed['parent'] = df['child']
    df_reversed['label'] = False

    return df_reversed

In [None]:
df_reversed_train = reverse_df(df_true_train)
df_reversed_eval = reverse_df(df_true_eval)

df_reversed_eval

Unnamed: 0,parent,child,label
0,agricultural school,"spot, building, farm",False
1,wadi,"stream, lake",False
2,sound,"stream, lake",False
3,facility center,"spot, building, farm",False
4,orchard(s),"forest, heath",False
...,...,...,...
71,icecap depression,"stream, lake",False
72,abandoned canal,"stream, lake",False
73,lake,"stream, lake",False
74,spillway,"spot, building, farm",False


### Manipulated dataset

In [None]:
def manipulate_parent(parent):
    other_parents = [p for p in parents if p != parent]
    return random.choice(other_parents)

def manipulate_df(df):
    df_manipulated = df.copy()
    df_manipulated['parent'] = df_manipulated['parent'].apply(manipulate_parent)
    df_manipulated['label'] = False

    return df_manipulated

In [None]:
df_manipulated_train = manipulate_df(df_true_train)
df_manipulated_eval = manipulate_df(df_true_eval)

df_manipulated_eval

Unnamed: 0,parent,child,label
0,"country, state, region",agricultural school,False
1,"mountain, hill, rock",wadi,False
2,"forest, heath",sound,False
3,"parks, area",facility center,False
4,"stream, lake",orchard(s),False
...,...,...,...
71,"road, railroad",icecap depression,False
72,"parks, area",abandoned canal,False
73,"parks, area",lake,False
74,"stream, lake",spillway,False


## Add all together

In [None]:
def make_up_df(df1, df2, df3, n_df1, n_df2, n_df3):
    df = pd.concat(
        [
            df1.sample(n_df1, random_state=RANDOM_STATE),
            df2.sample(n_df2, random_state=RANDOM_STATE),
            df3.sample(n_df3, random_state=RANDOM_STATE),
        ],
        ignore_index=True
    )
    return df

In [None]:
def create_final_df(df_true, df_reversed, df_manipulated):
    n_pos_samples = len(df_true)
    n_reversed_samples = n_pos_samples // 3
    n_manipulated_samples = n_pos_samples - n_reversed_samples

    final_df = make_up_df(
        df_true,
        df_reversed,
        df_manipulated,
        n_pos_samples,
        n_reversed_samples,
        n_manipulated_samples,
    )

    return final_df

In [None]:
df_train = create_final_df(df_true_train, df_reversed_train, df_manipulated_train)
df_eval = create_final_df(df_true_eval, df_reversed_eval, df_manipulated_eval)

In [None]:
df_train

Unnamed: 0,parent,child,label
0,"spot, building, farm",coal mine(s),True
1,undersea,deep,True
2,"spot, building, farm",free trade zone,True
3,"spot, building, farm",leprosarium,True
4,"forest, heath",tree(s),True
...,...,...,...
795,"country, state, region",fissure,False
796,"spot, building, farm",sandy desert,False
797,"forest, heath",administrative division,False
798,"parks, area",coconut grove,False


In [None]:
df_eval

Unnamed: 0,parent,child,label
0,"stream, lake",abandoned canal,True
1,"stream, lake",sulphur spring(s),True
2,"parks, area",region,True
3,undersea,seachannel,True
4,"parks, area",amusement park,True
...,...,...,...
147,"spot, building, farm",marsh(es),False
148,"road, railroad",abandoned mission,False
149,"country, state, region",munitions plant,False
150,"country, state, region",agricultural school,False


In [None]:
# df_train.to_csv(CHALLENGE_DATA_PATH + 'geonames-generated-df-train-400.csv', index=False)
# df_eval.to_csv(CHALLENGE_DATA_PATH + 'geonames-generated-df-eval-76.csv', index=False)

# Handling dfs

In [12]:
df_train = pd.read_csv(CHALLENGE_DATA_PATH + 'geonames-generated-df-train-400.csv')
df_eval = pd.read_csv(CHALLENGE_DATA_PATH + 'geonames-generated-df-eval-76.csv')

In [13]:
train_df = df_train.copy()
eval_df = df_eval.copy()

# Fine-tune

## Functions

In [14]:
def generate_prompt(parent, child, index, mask='[MASK]'):
    prompts = [
        f"{parent} is the superclass of {child}. This statement is {mask}.",
        f"{child} is a subclass of {parent}. This statement is {mask}.",
        f"{parent} is the parent class of {child}. This statement is {mask}.",
        f"{child} is a child class of {parent}. This statement is {mask}.",
        f"{parent} is a supertype of {child}. This statement is {mask}.",
        f"{child} is a subtype of {parent}. This statement is {mask}.",
        f"{parent} is an ancestor class of {child}. This statement is {mask}.",
        f"{child} is a descendant classs of {child}. This statement is {mask}.",
        f'"{parent}" is the superclass of "{child}". This statement is {mask}.',
    ]

    return prompts[index - 1]

In [15]:
def tokenize_function(examples, column_name, tokenizer):
    return tokenizer(
        examples[column_name],
        padding='max_length',
        truncation=True
    )

In [16]:
def init_tokenizer_and_model(model_name):
    tokenizer = BertTokenizer.from_pretrained(model_name)
    model = BertForSequenceClassification.from_pretrained(model_name, num_labels=2)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    return tokenizer, model, device

In [17]:
def predict_masked_sentence(sentence, tokenizer, model, device):
    inputs = tokenizer(sentence, return_tensors='pt').to(device)
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
    predicted_label = torch.argmax(logits, dim=1).item()
    label_map = {0: False, 1: True}
    return label_map[predicted_label]

In [18]:
def save_and_print_evals(df_train, df_eval, run_name, prompt_index):
    global evaluation_results

    y_true_train = df_train['label']
    y_pred_train = df_train[f'prediction_prompt_{prompt_index}']

    y_true_eval = df_eval['label']
    y_pred_eval = df_eval[f'prediction_prompt_{prompt_index}']

    evaluation_results[run_name] = {
        'acc-trn': accuracy_score(y_true_train, y_pred_train),
        'prc-trn': precision_score(y_true_train, y_pred_train, pos_label=True),
        'rcl-trn': recall_score(y_true_train, y_pred_train, pos_label=True),
        'f1s-trn': f1_score(y_true_train, y_pred_train, pos_label=True),
        'acc-evl': accuracy_score(y_true_eval, y_pred_eval),
        'prc-evl': precision_score(y_true_eval, y_pred_eval, pos_label=True),
        'rcl-evl': recall_score(y_true_eval, y_pred_eval, pos_label=True),
        'f1s-evl': f1_score(y_true_eval, y_pred_eval, pos_label=True),
    }

    print(f"{run_name} - Evaluation on train and evaluation data results:")
    print(evaluation_results[run_name])
    print()

In [19]:
def finetune_and_save_model(run_name, model_name, training_args, train_df, eval_df, n_prompts):
    tokenizer, model, device = init_tokenizer_and_model(model_name)

    # TODO
    for prompt_index in range(1, n_prompts + 1):
        ms_column_name = f'masked_sentence_{prompt_index}'

        train_df[ms_column_name] = train_df.apply(
            lambda row: generate_prompt(row['parent'], row['child'], prompt_index),
            axis=1
        )
        eval_df[ms_column_name] = eval_df.apply(
            lambda row: generate_prompt(row['parent'], row['child'], prompt_index),
            axis=1
        )

        train_dataset = Dataset.from_pandas(train_df[[ms_column_name, 'label']])
        eval_dataset = Dataset.from_pandas(eval_df[[ms_column_name, 'label']])

        train_tokenized_dataset = train_dataset.map(
            lambda x: tokenize_function(x, ms_column_name, tokenizer),
            batched=True
        )
        eval_tokenized_dataset = eval_dataset.map(
            lambda x: tokenize_function(x, ms_column_name, tokenizer),
            batched=True
        )

        trainer = Trainer(
            model=model,
            args=training_args,
            train_dataset=train_tokenized_dataset,
            eval_dataset=eval_tokenized_dataset,
        )

        print(f"Fine-tuning {model_name} with prompt-{prompt_index} ...")
        trainer.train()

        eval_results = trainer.evaluate(eval_dataset=eval_tokenized_dataset)
        print(eval_results)

        train_df[f'prediction_prompt_{prompt_index}'] = train_df[ms_column_name].apply(
            predict_masked_sentence,
            args=(tokenizer, model, device)
        )
        eval_df[f'prediction_prompt_{prompt_index}'] = eval_df[ms_column_name].apply(
            predict_masked_sentence,
            args=(tokenizer, model, device)
        )

        save_and_print_evals(train_df, eval_df, run_name, prompt_index)
        with open(CHALLENGE_OUTPUT_PATH + 'geonames-binary-evaluation-results-bert.json', 'w') as f:
            json.dump(evaluation_results, f)

        train_df.to_csv(
            CHALLENGE_OUTPUT_PATH + \
            f'geonames-binary-train-results-{run_name}-prompt{prompt_index}.csv',
            index=False
        )
        eval_df.to_csv(
            CHALLENGE_OUTPUT_PATH + \
            f'geonames-binary-eval-results-{run_name}-prompt{prompt_index}.csv',
            index=False
        )

        model.save_pretrained(CHALLENGE_MODEL_PATH + 'temp/' + f'res-{run_name}-prompt-{prompt_index}-model/')
        tokenizer.save_pretrained(CHALLENGE_MODEL_PATH + 'temp/' + f'res-{run_name}-prompt-{prompt_index}-model/')

    # model.save_pretrained(CHALLENGE_MODEL_PATH + f'res-{run_name}-prompt-{prompt_index}-model/')
    # tokenizer.save_pretrained(CHALLENGE_MODEL_PATH + f'res-{run_name}-prompt-{prompt_index}-model/')

    return tokenizer, model

## Procedure

In [None]:
num_epochs = 1

training_args = TrainingArguments(
    output_dir='./results/',
    num_train_epochs=num_epochs,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir=CHALLENGE_LOG_PATH,
    logging_steps=50,
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
)

In [None]:
evaluation_results = {}

# with open(CHALLENGE_OUTPUT_PATH + 'evaluation-results-geonames-(binary).json', 'r') as f:
#     evaluation_results = json.load(f)

In [None]:
num_prompts = 1

tokenizer1, model1 = finetune_and_save_model(
    run_name='bert-ft-1epoch-1prompt-400-76',
    model_name='bert-large-uncased',
    training_args=training_args,
    train_df=train_df,
    eval_df=eval_df,
    n_prompts=num_prompts
)

In [None]:
num_prompts = 8

tokenizer3, model3 = finetune_and_save_model(
    run_name='bert-ft-8epochs-8prompts-400-76-try1',
    model_name='bert-large-uncased',
    training_args=training_args,
    train_df=train_df,
    eval_df=eval_df,
    n_prompts=num_prompts
)

In [None]:
num_prompts = 8

tokenizer4, model4 = finetune_and_save_model(
    run_name='bert-ft-6epochs-6prompts-7-8-from4-400-76-try1',
    model_name=CHALLENGE_MODEL_PATH + 'temp/' + 'res-bert-ft-8epochs-8prompts-400-76-try1-prompt-4-model',
    training_args=training_args,
    train_df=train_df,
    eval_df=eval_df,
    n_prompts=num_prompts
)

In [None]:
# To run this one, the following should be done:
# 1- Change generate_prompt() so that it has only 5 prompts
# 2- Change the training cycle loop for prompts so that it will start from 2
# 3- Change predict parent() so that the biggest possible level will be 2 not 4

num_prompts = 5

tokenizer5, model5 = finetune_and_save_model(
    run_name='bert-ft-5epochs-5prompts-topdown-from1-400-76-try1',
    model_name=CHALLENGE_MODEL_PATH + 'temp/' + 'res-bert-ft-8epochs-8prompts-400-76-try1-prompt-1-model',
    training_args=training_args,
    train_df=train_df,
    eval_df=eval_df,
    n_prompts=num_prompts
)

In [None]:
with open(CHALLENGE_OUTPUT_PATH + 'evaluation-results-geonames-(binary).json', 'w') as f:
    json.dump(evaluation_results, f)

# Evaluation

## Functions

In [20]:
def predict_parent(child, parents_scores, tokenizer, model, device, level=0):
    global randoms

    if level == 0:
        prompt_indices = [1]
    else:
        prompt_indices = [level * 2, level * 2 + 1]

    for parent in parents_scores.keys():
        # print(generate_prompt(parent, child, 1))
        results = [
            predict_masked_sentence(
                generate_prompt(parent, child, prompt_index),
                tokenizer,
                model,
                device
            )
            for prompt_index in prompt_indices
        ]
        # print(results)
        score = sum([1 for res in results if res == True])

        parents_scores[parent] += score

    max_score = max(parents_scores.values())
    parents_max_scores = {
        parent: score
        for parent, score in parents_scores.items()
        if score == max_score
    }

    if len(parents_max_scores) == 1:
        return next(iter(parents_max_scores))

    else:
        # can be 2 or 4
        if level != 4:
            return predict_parent(child, parents_max_scores, tokenizer, model, device, level + 1)
        else:
            # print(parents_max_scores)
            randoms += 1
            # return level, len(parents_max_scores), random.choice(list(parents_max_scores.keys()))
            return random.choice(list(parents_max_scores.keys()))

In [21]:
def evaluate_model(df, tokenizer, model, device, run_name):

    y_true = df['parent']

    df['prediction'] = df.progress_apply(
        lambda row: predict_parent(
            row['child'],
            {parent: 0 for parent in parents},
            tokenizer,
            model,
            device
        ),
        axis=1
    )
    y_pred = df['prediction']

    # print(y_true, type(y_true))
    # print(y_pred, type(y_pred))

    evaluation_result = {
        'acc': accuracy_score(y_true, y_pred),
        'prc': precision_score(y_true, y_pred, average='weighted'),
        'rcl': recall_score(y_true, y_pred, average='weighted'),
        'f1s': f1_score(y_true, y_pred, average='weighted'),
    }

    print(f"Run name: {run_name}")
    print(f"Evaluation result on test dataset:")
    print(evaluation_result)
    print()

    df.to_csv(CHALLENGE_OUTPUT_PATH + f'results-{run_name}.csv', index=False)

In [22]:
def percentage(part, whole):
    return round(part / whole, 2) * 100

In [23]:
def init_model_for_eval(model_name, temp_folder=True):
    if temp_folder == True:
        model_path = CHALLENGE_MODEL_PATH + 'temp/' + model_name

    else:
        model_path = CHALLENGE_MODEL_PATH + model_name

    model = BertForSequenceClassification.from_pretrained(model_path)
    tokenizer = BertTokenizer.from_pretrained(model_path)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    return tokenizer, model, device

## On validation dataset

In [25]:
test_df = eval_df[eval_df['label'] == True].copy()

test_df

Unnamed: 0,parent,child,label
0,"stream, lake",abandoned canal,True
1,"stream, lake",sulphur spring(s),True
2,"parks, area",region,True
3,undersea,seachannel,True
4,"parks, area",amusement park,True
...,...,...,...
71,undersea,sill,True
72,"spot, building, farm",ruined bridge,True
73,"spot, building, farm",mall,True
74,"parks, area",continent,True


### 1 to 8 prompts

1, 1-2, 1-3, 1-4, 1-5, 1-6, 1-7, 1-8

In [None]:
# Prompt 1
model_name = f'res-bert-ft-8epochs-8prompts-400-76-try1-prompt-{1}-model'
randoms = 0
random.seed(50 - 1)

evaluate_model(
    test_df,
    *init_model_for_eval(model_name),
    model_name[4:-6] + '-wncrs'
)

non_randoms = len(test_df) - randoms

print(f"Number of randomly chosen parents: {randoms} ({percentage(randoms, len(test_df))}%)")
print(f"Number of preciesly chosen parents: {non_randoms} ({percentage(non_randoms, len(test_df))}%)")

In [None]:
# Prompts 1-2
model_name = f'res-bert-ft-8epochs-8prompts-400-76-try1-prompt-{2}-model'
randoms = 0
random.seed(50 - 2)

evaluate_model(
    test_df,
    *init_model_for_eval(model_name),
    model_name[4:-6] + '-wncrs'
)

non_randoms = len(test_df) - randoms

print(f"Number of randomly chosen parents: {randoms} ({percentage(randoms, len(test_df))}%)")
print(f"Number of preciesly chosen parents: {non_randoms} ({percentage(non_randoms, len(test_df))}%)")

In [None]:
# Prompts 1-3
model_name = f'res-bert-ft-8epochs-8prompts-400-76-try1-prompt-{3}-model'
randoms = 0
random.seed(50 - 3)

evaluate_model(
    test_df,
    *init_model_for_eval(model_name),
    model_name[4:-6] + '-wncrs'
)

non_randoms = len(test_df) - randoms

print(f"Number of randomly chosen parents: {randoms} ({percentage(randoms, len(test_df))}%)")
print(f"Number of preciesly chosen parents: {non_randoms} ({percentage(non_randoms, len(test_df))}%)")

In [None]:
# Prompts 1-4
model_name = f'res-bert-ft-8epochs-8prompts-400-76-try1-prompt-{4}-model'
randoms = 0
random.seed(50 - 4)

evaluate_model(
    test_df,
    *init_model_for_eval(model_name),
    model_name[4:-6] + '-wncrs'
)

non_randoms = len(test_df) - randoms

print(f"Number of randomly chosen parents: {randoms} ({percentage(randoms, len(test_df))}%)")
print(f"Number of preciesly chosen parents: {non_randoms} ({percentage(non_randoms, len(test_df))}%)")

In [None]:
# Prompts 1-5
model_name = f'res-bert-ft-8epochs-8prompts-400-76-try1-prompt-{5}-model'
randoms = 0
random.seed(50 - 5)

evaluate_model(
    test_df,
    *init_model_for_eval(model_name),
    model_name[4:-6] + '-wncrs'
)

non_randoms = len(test_df) - randoms

print(f"Number of randomly chosen parents: {randoms} ({percentage(randoms, len(test_df))}%)")
print(f"Number of preciesly chosen parents: {non_randoms} ({percentage(non_randoms, len(test_df))}%)")

In [None]:
# Prompts 1-6
model_name = f'res-bert-ft-8epochs-8prompts-400-76-try2-prompt-{6}-model'
randoms = 0
random.seed(50 - 6)

evaluate_model(
    test_df,
    *init_model_for_eval(model_name),
    model_name[4:-6] + '-wncrs'
)

non_randoms = len(test_df) - randoms

print(f"Number of randomly chosen parents: {randoms} ({percentage(randoms, len(test_df))}%)")
print(f"Number of preciesly chosen parents: {non_randoms} ({percentage(non_randoms, len(test_df))}%)")

In [None]:
# Prompts 1-7
model_name = f'res-bert-ft-8epochs-8prompts-400-76-try2-prompt-{7}-model'
randoms = 0
random.seed(50 - 7)

evaluate_model(
    test_df,
    *init_model_for_eval(model_name),
    model_name[4:-6] + '-wncrs'
)

non_randoms = len(test_df) - randoms

print(f"Number of randomly chosen parents: {randoms} ({percentage(randoms, len(test_df))}%)")
print(f"Number of preciesly chosen parents: {non_randoms} ({percentage(non_randoms, len(test_df))}%)")

In [None]:
# Prompts 1-8
model_name = f'res-bert-ft-8epochs-8prompts-400-76-try2-prompt-{8}-model'
randoms = 0
random.seed(50 - 8)

evaluate_model(
    test_df,
    *init_model_for_eval(model_name),
    model_name[4:-6] + '-wncrs'
)

non_randoms = len(test_df) - randoms

print(f"Number of randomly chosen parents: {randoms} ({percentage(randoms, len(test_df))}%)")
print(f"Number of preciesly chosen parents: {non_randoms} ({percentage(non_randoms, len(test_df))}%)")

### 1 to 4 + 7, 8 prompts

1-4 + 7, 1-4 + 7, 8

In [None]:
# Prompts 1-4 + 7
model_name = f'res-bert-ft-6epochs-6prompts-7-8-from4-400-76-try1-prompt-{7}-model'
randoms = 0
random.seed(50 - 9)

evaluate_model(
    test_df,
    *init_model_for_eval(model_name),
    model_name[4:-6] + '-wncrs'
)

non_randoms = len(test_df) - randoms

print(f"Number of randomly chosen parents: {randoms} ({percentage(randoms, len(test_df))}%)")
print(f"Number of preciesly chosen parents: {non_randoms} ({percentage(non_randoms, len(test_df))}%)")

In [None]:
# Prompts 1-4 + 7, 8
model_name = f'res-bert-ft-6epochs-6prompts-7-8-from4-400-76-try1-prompt-{8}-model'
randoms = 0
random.seed(50 - 10)

evaluate_model(
    test_df,
    *init_model_for_eval(model_name),
    model_name[4:-6] + '-wncrs'
)

non_randoms = len(test_df) - randoms

print(f"Number of randomly chosen parents: {randoms} ({percentage(randoms, len(test_df))}%)")
print(f"Number of preciesly chosen parents: {non_randoms} ({percentage(non_randoms, len(test_df))}%)")

### 1 to 5 prompts - only top-down (supeclass statement) prompts

1, 1+3, 1+3+5, 1+3+5+7, 1+3+5+7+9

In [None]:
# Prompt 1
model_name = f'res-bert-ft-8epochs-8prompts-400-76-try1-prompt-{1}-model'
randoms = 0
random.seed(50 - 11)

evaluate_model(
    test_df,
    *init_model_for_eval(model_name),
    model_name[4:-6] + '-wncrs'
)

non_randoms = len(test_df) - randoms

print(f"Number of randomly chosen parents: {randoms} ({percentage(randoms, len(test_df))}%)")
print(f"Number of preciesly chosen parents: {non_randoms} ({percentage(non_randoms, len(test_df))}%)")

In [None]:
# Prompts 1, 3
model_name = f'res-bert-ft-5epochs-5prompts-topdown-from1-400-76-try1-prompt-{2}-model'
randoms = 0
random.seed(50 - 12)

evaluate_model(
    test_df,
    *init_model_for_eval(model_name),
    model_name[4:-6] + '-wncrs'
)

non_randoms = len(test_df) - randoms

print(f"Number of randomly chosen parents: {randoms} ({percentage(randoms, len(test_df))}%)")
print(f"Number of preciesly chosen parents: {non_randoms} ({percentage(non_randoms, len(test_df))}%)")

In [None]:
# Prompts 1, 3, 5
model_name = f'res-bert-ft-5epochs-5prompts-topdown-from1-400-76-try1-prompt-{3}-model'
randoms = 0
random.seed(50 + 13)

evaluate_model(
    test_df,
    *init_model_for_eval(model_name),
    model_name[4:-6] + '-wncrs'
)

non_randoms = len(test_df) - randoms

print(f"Number of randomly chosen parents: {randoms} ({percentage(randoms, len(test_df))}%)")
print(f"Number of preciesly chosen parents: {non_randoms} ({percentage(non_randoms, len(test_df))}%)")

In [None]:
# Prompts 1, 3, 5, 7
model_name = f'res-bert-ft-5epochs-5prompts-topdown-from1-400-76-try1-prompt-{4}-model'
randoms = 0
random.seed(50 - 14)

evaluate_model(
    test_df,
    *init_model_for_eval(model_name),
    model_name[4:-6] + '-wncrs'
)

non_randoms = len(test_df) - randoms

print(f"Number of randomly chosen parents: {randoms} ({percentage(randoms, len(test_df))}%)")
print(f"Number of preciesly chosen parents: {non_randoms} ({percentage(non_randoms, len(test_df))}%)")

In [None]:
# Prompts 1, 3, 5, 7, 9
model_name = f'res-bert-ft-5epochs-5prompts-topdown-from1-400-76-try1-prompt-{5}-model'
randoms = 0
random.seed(50 - 15)

evaluate_model(
    test_df,
    *init_model_for_eval(model_name),
    model_name[4:-6] + '-wncrs'
)

non_randoms = len(test_df) - randoms

print(f"Number of randomly chosen parents: {randoms} ({percentage(randoms, len(test_df))}%)")
print(f"Number of preciesly chosen parents: {non_randoms} ({percentage(non_randoms, len(test_df))}%)")