In [2]:
import boto3
import copy
import datetime
import importlib
import json
import os
import pathlib
import sys
import wandb
import evaluate

import awswrangler as wr
import numpy as np
import pandas as pd

from botocore.exceptions import ClientError
from IPython.display import display
import plotly.express as px
import plotly.io as pio
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

matthews_metric = evaluate.load('matthews_correlation')
pio.renderers.default = 'iframe'
pio.get_chrome()

# Adding ../01_modules or ./01_modules to the system path so that we can load modules from 
# there as well
if '__file__' in globals():
    script_dir = pathlib.Path(__file__).parent.resolve()
else:
    script_dir = pathlib.Path().absolute()
modules_path_in_dev = os.path.abspath(os.path.join(script_dir, '..', '01_modules'))
modules_path_in_prod = os.path.abspath(os.path.join(script_dir, '01_modules'))
if os.path.exists(modules_path_in_dev):
    sys.path.append(modules_path_in_dev)
if os.path.exists(modules_path_in_prod):
    sys.path.append(modules_path_in_prod)


# # Jupyter only reads a local module the first time after 
# # kernel start. Re-running a cell with 
# # "from mymodulename import *" would not change
# # anything, even if the imported module has since changed.
# # As a workaround, we need to directly load the module, 
# # use importlib.reload to reload it and then import * 
import utils
_ = importlib.reload(utils)
import config
_ = importlib.reload(config) 

utils.pd_set_options(cols=500)

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml
config.py loaded: v0.1
utils.py loaded: v0.2.12
utils.py loaded: v0.2.12
config.py loaded: v0.1


In [3]:
wandb_api_key = utils.get_secret(region_name=config.AWS_REGION, secret_name='WeightsAndBiases')['api_key']
wandb.login(key=wandb_api_key)

plotly_category_orders = {
    'label': ['subfield', 'topic'],
    'sample': ['100%', '10%', '1%'],
    'text': ['title', 'abstract', 'fulltext'],
}

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/sagemaker-user/.netrc
[34m[1mwandb[0m: Currently logged in as: [33msteve-attila-kopias[0m to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [6]:
np.random.seed(1)
baseline_results = {}
use_semibalanced = False
for label_type in ['subfield', 'topic']:
    database_name = '03_core'
    table_name = 'unified_works_test'
    if use_semibalanced:
        table_name = table_name = 'unified_works_semibalanced_test'
    label_table = f'{label_type}s'
    if use_semibalanced:
        label_table = f'{label_table}_semibalanced'
    
    label_df = wr.athena.read_sql_query(
    f"""
    SELECT 
        {label_type}_index AS index,
        {label_type}_display_name AS display_name
    FROM
        {label_table}
    ORDER BY
        {label_type}_count DESC
    """, '03_core'
    )
    
    texts_df = wr.athena.read_sql_query(f"""
        SELECT
            {label_type}_index
        FROM
            "{database_name}".{table_name}
        """,
        database_name
    )
    test_reference_labels = texts_df[f'{label_type}_index'].tolist()
    
    random_labels = np.random.randint(0, label_df.shape[0], texts_df.shape[0])
    most_popular_labels = [label_df.iloc[0, 0] for i in range(0, texts_df.shape[0])]
    matthews_random = matthews_metric.compute(references=test_reference_labels, predictions=random_labels)
    matthews_popular = matthews_metric.compute(references=test_reference_labels, predictions=most_popular_labels)
    accuracy_random = accuracy_score(test_reference_labels, random_labels)
    accuracy_popular = accuracy_score(test_reference_labels, most_popular_labels)
    _, _, f1_random, _ = precision_recall_fscore_support(test_reference_labels, random_labels, average='micro')
    _, _, f1_popular, _ = precision_recall_fscore_support(test_reference_labels, most_popular_labels, average='micro')
    baseline_results[label_type] = {
        'random': {
            'matthews': matthews_random['matthews_correlation'],
            'accuracy': accuracy_random,
            'f1': f1_random,
        },
        'popular': {
            'matthews': matthews_popular['matthews_correlation'],
            'accuracy': accuracy_popular,
            'f1': f1_popular,
        }
    }

# Freezing previously generated random values for consistent baseline on reruns
baseline_results = {
    "subfield": {
        "random": {
            "matthews": -0.0016264598985861842,
            "accuracy": 0.08914063100699696,
            "f1": 0.08914063100699696
        },
        "popular": {
            "matthews": 0.0,
            "accuracy": 0.38175667016941334,
            "f1": 0.38175667016941334
        }
    },
    "topic": {
        "random": {
            "matthews": -0.0001750655412931846,
            "accuracy": 0.0031012122920778123,
            "f1": 0.0031012122920778123
        },
        "popular": {
            "matthews": 0.0,
            "accuracy": 0.04756900838096215,
            "f1": 0.047569008380962154
        }
    }
}
print(json.dumps(baseline_results, indent=4, default=str))

{
    "subfield": {
        "random": {
            "matthews": -0.0016264598985861842,
            "accuracy": 0.08914063100699696,
            "f1": 0.08914063100699696
        },
        "popular": {
            "matthews": 0.0,
            "accuracy": 0.38175667016941334,
            "f1": 0.38175667016941334
        }
    },
    "topic": {
        "random": {
            "matthews": -0.0001750655412931846,
            "accuracy": 0.0031012122920778123,
            "f1": 0.0031012122920778123
        },
        "popular": {
            "matthews": 0.0,
            "accuracy": 0.04756900838096215,
            "f1": 0.047569008380962154
        }
    }
}


In [3]:
class WandbRuns:
    def __init__(self, entity=config.WANDB_ENTITY, project=config.WANDB_PROJECT, and_tags=None, filters={}):
        self.entity = entity
        self.project = project
        self.and_tags = and_tags
        self.filters = filters
        if 'state' not in filters:
            self.filters['state'] = 'finished'
        # To test:
        # self.filters['config.job_name'] = 'scibert-subfield-fulltext-s100-0917214139'

        if self.and_tags is not None:
            self.filters['tags'] = {'$all': self.and_tags}

        self.root = f'{self.entity}/{self.project}'
        self.runs = None
        self.runs_history = None

        self.api = wandb.Api()
        
        self.get_runs()
        self.get_runs_history()

    def get_runs(self):
        self.runs = self.api.runs(
            self.root,
            filters=self.filters,
        )
    
    def get_runs_history(self):
        runs_details = []
        for run in self.runs:                
            needed_keys = [
                '_step',
                '_runtime',
                '_timestamp',

                'train/epoch',
                'train/global_step',

                'train/loss',
                'eval/loss',

                'eval/f1',
                'eval/accuracy',
                'eval/matthews_correlation',
            ]

            history_df_full = run.history(
                pandas=True,
                samples=500,
                x_axis='_epoch',
            )
            needed_cols = [v for v in needed_keys if v in history_df_full.columns.to_list()]
            history_df = history_df_full[needed_cols]

            history_df = history_df.sort_values(by=['_step', '_runtime', '_timestamp'])
            history_df.loc[:, 'train/loss'] = history_df.loc[:, 'train/loss'].ffill()
            history_df = history_df[history_df['train/epoch'] == np.round(history_df['train/epoch']).astype('Int64')]
            history_df['train/epoch'] = history_df['train/epoch'].astype('Int64')
            history_df = history_df[history_df['eval/loss'].notna()]

            history_df['run_id'] = run.id
            tag_keys = []
            for tag in run.tags:
                tag_key, tag_value = tag.split(': ')
                if tag_key != 'instance':
                    history_df[tag_key] = tag_value
                    tag_keys.append(tag_key)

            history_df = history_df[['run_id'] + tag_keys + needed_cols]

            best_f1_epoch_idx = history_df['eval/f1'].idxmax()
            history_df['is_best_epoch'] = False
            history_df.loc[best_f1_epoch_idx, 'is_best_epoch'] = True

            if self.runs_history is None:
                self.runs_history = history_df
            else:
                self.runs_history = pd.concat([self.runs_history, history_df])
            self.runs_history = self.runs_history.reset_index(drop=True)

            run_details = {
                # 'name': run.name,
                # 'id': run.id,
                # 'url': run.url,
                # 'state': run.state,  # running, finished, crashed, killed, preempting, preempted
                # 'tags': run.tags,  # list
                # 'config': run.config,  # dict
                # 'summary': summary,  # dict
                # 'metadata ': run.metadata,
                # 'created_at ': run.created_at,
                #'system_metrics ': run.system_metrics,
    
                # 'history_keys': run.history_keys,
                # 'history_dict': history_dict,
                # 'history_df': history_df,
                # 'html': run.to_html(height=420, hidden=False)
            }
            runs_details.append(run_details)
        self.runs_details = runs_details

wandb_log = WandbRuns()
wandb_log.runs_history.to_pickle(f'./wandb_runs_history_{datetime.datetime.now().strftime("%Y%m%d%H%M%S")}.pkl')
wandb_runs_history = wandb_log.runs_history

In [8]:
wandb_runs_history = pd.read_pickle('./wandb_runs_history_20250920212130.pkl')
wandb_runs_history.head(3)

Unnamed: 0,run_id,label,model,sample,text,_step,_runtime,_timestamp,train/epoch,train/global_step,train/loss,eval/loss,eval/f1,eval/accuracy,eval/matthews_correlation,is_best_epoch
0,bert-subfield-title-s1-0916224638-114ogh-algo-1,subfield,bert,1%,title,0,35.189463,1758063000.0,1,195,,1.34502,0.558974,0.558974,0.436695,False
1,bert-subfield-title-s1-0916224638-114ogh-algo-1,subfield,bert,1%,title,1,54.88482,1758063000.0,2,390,,1.42066,0.564103,0.564103,0.447146,False
2,bert-subfield-title-s1-0916224638-114ogh-algo-1,subfield,bert,1%,title,3,76.12758,1758063000.0,3,585,1.2704,1.35982,0.6,0.6,0.493372,False


In [5]:
wandb_runs_history[
    (wandb_runs_history['is_best_epoch'] == True) &
    (wandb_runs_history['text'] != 'title') &
    (wandb_runs_history['label'] == 'topic') &
    (wandb_runs_history['sample'] == '100%')
].sort_values(by='eval/f1', ascending=False)

Unnamed: 0,run_id,label,model,sample,text,_step,_runtime,_timestamp,train/epoch,train/global_step,train/loss,eval/loss,eval/f1,eval/accuracy,eval/matthews_correlation,is_best_epoch
442,scibert-topic-abstract-s100-0917214440-y0us2l-...,topic,scibert,100%,abstract,31,13585.782862,1758159000.0,3,14568,0.7947,1.205999,0.706871,0.706871,0.703856,True
694,longformer-topic-abstract-s100-0918102051-o64s...,topic,longformer,100%,abstract,52,80698.401466,1758279000.0,5,24280,0.6323,1.231289,0.704437,0.704437,0.701366,True
419,roberta-topic-abstract-s100-0917145422-wwrsxz-...,topic,roberta,100%,abstract,28,12647.500345,1758154000.0,5,12140,0.7266,1.228225,0.703873,0.703873,0.700799,True
526,ModernBERT-topic-abstract-s100-0918004324-7q71...,topic,ModernBERT,100%,abstract,20,15729.939139,1758172000.0,2,9712,0.915,1.161514,0.703206,0.703206,0.70006,True
608,bigbird-topic-abstract-s100-0918020047-zn8l1y-...,topic,bigbird,100%,abstract,22,20557.221457,1758184000.0,4,9712,0.8382,1.223253,0.702745,0.702745,0.69964,True
724,deberta-topic-abstract-s100-0918145257-mccxcr-...,topic,deberta,100%,abstract,28,22263.979882,1758245000.0,5,12140,0.8016,1.276827,0.700823,0.700823,0.697723,True
77,bert-topic-abstract-s100-0917032639-7bunqv-algo-1,topic,bert,100%,abstract,31,13538.188686,1758093000.0,3,14568,0.8994,1.234328,0.700515,0.700515,0.697397,True
302,distilbert-topic-abstract-s100-0917115955-sp0l...,topic,distilbert,100%,abstract,31,7255.338266,1758137000.0,3,14568,0.9168,1.219088,0.696594,0.696594,0.693465,True
432,scibert-topic-fulltext-s100-0917213838-zy7a39-...,topic,scibert,100%,fulltext,31,14376.016369,1758160000.0,3,14568,0.9994,1.37829,0.671374,0.671374,0.667955,True
701,ModernBERT-topic-fulltext-s100-0918144354-4c5g...,topic,ModernBERT,100%,fulltext,10,9236.907684,1758217000.0,2,4856,1.1846,1.34499,0.66858,0.66858,0.665092,True


In [9]:
baseline_five_epochs = []
baseline_single_epoch = [
    {
        'run_id': 'BASELINE',
        'model': 'BASELINE',
        'label': 'BASELINE',
        'text': 'BASELINE',
        'sample': 'BASELINE',
        'label': 'subfield',
        'eval/f1': baseline_results['subfield']['popular']['f1'],
        'eval/accuracy': baseline_results['subfield']['popular']['accuracy'],
        'eval/matthews_correlation': baseline_results['subfield']['popular']['matthews'],
    },
    {
        'run_id': 'BASELINE',
        'model': 'BASELINE',
        'label': 'BASELINE',
        'text': 'BASELINE',
        'sample': 'BASELINE',
        'label': 'topic',
        'eval/f1': baseline_results['topic']['popular']['f1'],
        'eval/accuracy': baseline_results['topic']['popular']['accuracy'],
        'eval/matthews_correlation': baseline_results['topic']['popular']['matthews'],
    }
]
for e in range(1, 6):
    rec = copy.deepcopy(baseline_single_epoch)
    rec[0]['train/epoch'] = e
    rec[1]['train/epoch'] = e
    baseline_five_epochs = baseline_five_epochs + rec

baseline_single_epoch_df = pd.DataFrame(baseline_single_epoch)
baseline_five_epochs_df = pd.DataFrame(baseline_five_epochs)
baseline_five_epochs_topic_df = baseline_five_epochs_df[baseline_five_epochs_df['label']=='topic']
baseline_five_epochs_subfield_df = baseline_five_epochs_df[baseline_five_epochs_df['label']=='subfield']

baseline_single_epoch_df.to_pickle(f'./baseline_single_epoch_df.pkl')
baseline_five_epochs_df.to_pickle(f'./baseline_five_epochs_df.pkl')
baseline_five_epochs_topic_df.to_pickle(f'./baseline_five_epochs_topic_df.pkl')
baseline_five_epochs_subfield_df.to_pickle(f'./baseline_five_epochs_subfield_df.pkl')
baseline_five_epochs_subfield_df.head(3)

Unnamed: 0,run_id,model,label,text,sample,eval/f1,eval/accuracy,eval/matthews_correlation,train/epoch
0,BASELINE,BASELINE,subfield,BASELINE,BASELINE,0.381757,0.381757,0.0,1
2,BASELINE,BASELINE,subfield,BASELINE,BASELINE,0.381757,0.381757,0.0,2
4,BASELINE,BASELINE,subfield,BASELINE,BASELINE,0.381757,0.381757,0.0,3


In [15]:
df_bert_title_topic_s1 = wandb_runs_history[
    # (wandb_runs_history['is_best_epoch'] == True) &
    (wandb_runs_history['model'] == 'bert') &
    (wandb_runs_history['text'] == 'title') &
    (wandb_runs_history['label'] == 'topic') &
    (wandb_runs_history['sample'] == '1%')
].sort_values(by='_step', ascending=True)
df_bert_title_topic_s1 = df_bert_title_topic_s1[['label', 'model', 'sample', 'text', 'train/epoch', 'eval/f1', 'eval/accuracy', 'eval/matthews_correlation',	'is_best_epoch']]
df_bert_title_topic_s1

Unnamed: 0,label,model,sample,text,train/epoch,eval/f1,eval/accuracy,eval/matthews_correlation,is_best_epoch
5,topic,bert,1%,title,1,0.071795,0.071795,0.067861,False
6,topic,bert,1%,title,2,0.1,0.1,0.093048,False
7,topic,bert,1%,title,3,0.151282,0.151282,0.146552,False
8,topic,bert,1%,title,4,0.164103,0.164103,0.159136,False
9,topic,bert,1%,title,5,0.169231,0.169231,0.164462,True


In [16]:
fig = px.line(
    data_frame=pd.concat([df_bert_title_topic_s1, baseline_five_epochs_topic_df]),
    x='train/epoch', 
    y=['eval/f1', 'eval/matthews_correlation'],
    # text='eval/f1',
    line_group='model',
    color=None,
    line_dash='model',
    symbol=None,
    markers=True, 
    range_x=None,
    range_y=[-0.05,0.9],
    title='BERT Title->Topic 1%',
    subtitle=None
)
fig.update_layout(xaxis_title='Epochs')
fig.update_layout(yaxis_title='Metrics')
fig.update_xaxes(tickmode='array', tickvals=list(range(1, 6)))
fig.show()

In [17]:
df = wandb_runs_history[
    # (wandb_runs_history['is_best_epoch'] == True) &
    (wandb_runs_history['model'] == 'bert') &
    (wandb_runs_history['text'] == 'title') &
    (wandb_runs_history['label'] == 'subfield') &
    (wandb_runs_history['sample'] == '1%')
].sort_values(by='_step', ascending=True)
df = df[['label', 'model', 'sample', 'text', 'train/epoch', 'eval/f1', 'eval/accuracy', 'eval/matthews_correlation',	'is_best_epoch']]
df_bert_title_subfield_s1 = df
df

Unnamed: 0,label,model,sample,text,train/epoch,eval/f1,eval/accuracy,eval/matthews_correlation,is_best_epoch
0,subfield,bert,1%,title,1,0.558974,0.558974,0.436695,False
1,subfield,bert,1%,title,2,0.564103,0.564103,0.447146,False
2,subfield,bert,1%,title,3,0.6,0.6,0.493372,False
3,subfield,bert,1%,title,4,0.6,0.6,0.502466,False
4,subfield,bert,1%,title,5,0.610256,0.610256,0.515223,True


In [18]:
fig = px.line(
    data_frame=pd.concat([df_bert_title_subfield_s1, baseline_five_epochs_subfield_df]),
    x='train/epoch', 
    y=['eval/f1', 'eval/matthews_correlation'],
    # text='eval/f1',
    line_group='model',
    color=None,
    line_dash='model',
    symbol=None,
    markers=True, 
    range_x=None,
    range_y=[-0.05,0.9],
    title='BERT Title->Subfield 1%',
    subtitle=None
)
fig.update_layout(xaxis_title='Epochs')
fig.update_layout(yaxis_title='Metrics')
fig.update_xaxes(tickmode='array', tickvals=list(range(1, 6)))
fig.show()

In [19]:
df = wandb_runs_history[
    # (wandb_runs_history['is_best_epoch'] == True) &
    (wandb_runs_history['model'] == 'bert') &
    # (wandb_runs_history['text'] == 'title') &
    # (wandb_runs_history['label'] == 'subfield') &
    (wandb_runs_history['sample'] == '1%')
].sort_values(by='_step', ascending=True)
df = df[['label', 'model', 'sample', 'text', 'train/epoch', 'eval/f1', 'eval/accuracy', 'eval/matthews_correlation',	'is_best_epoch']]
df_bert_s1 = df
df

Unnamed: 0,label,model,sample,text,train/epoch,eval/f1,eval/accuracy,eval/matthews_correlation,is_best_epoch
0,subfield,bert,1%,title,1,0.558974,0.558974,0.436695,False
5,topic,bert,1%,title,1,0.071795,0.071795,0.067861,False
15,topic,bert,1%,abstract,1,0.076923,0.076923,0.075142,False
10,subfield,bert,1%,abstract,1,0.576923,0.576923,0.465857,False
25,topic,bert,1%,fulltext,1,0.102564,0.102564,0.100528,False
20,subfield,bert,1%,fulltext,1,0.607692,0.607692,0.501196,False
6,topic,bert,1%,title,2,0.1,0.1,0.093048,False
11,subfield,bert,1%,abstract,2,0.648718,0.648718,0.557235,True
16,topic,bert,1%,abstract,2,0.164103,0.164103,0.160134,False
21,subfield,bert,1%,fulltext,2,0.674359,0.674359,0.594514,False


In [20]:
df_bert_s1['text_label'] = df_bert_s1['text'] + '->' + df_bert_s1['label']

fig = px.line(
    data_frame=df_bert_s1,
    x='train/epoch', 
    y='eval/f1',
    # text='eval/f1',
    # line_group='text_label',
    color='text',
    line_dash='label',
    symbol=None,
    markers=True, 
    range_x=None,
    range_y=[-0.05,0.9],
    title='BERT 1% models',
    subtitle=None
)
fig.update_layout(xaxis_title='Epochs')
fig.update_layout(yaxis_title='F1')
fig.update_xaxes(tickmode='array', tickvals=list(range(1, 6)))
fig.show()

In [21]:
df = wandb_runs_history[
    # (wandb_runs_history['is_best_epoch'] == True) &
    (wandb_runs_history['model'] == 'bert') &
    # (wandb_runs_history['text'] == 'title') &
    (wandb_runs_history['label'] == 'subfield') # &
    # (wandb_runs_history['sample'] == '1%')
].sort_values(by='_step', ascending=True)
df = df[['label', 'model', 'sample', 'text', 'train/epoch', 'eval/f1', 'eval/accuracy', 'eval/matthews_correlation',	'is_best_epoch']]
df_bert_subfield = df
df

Unnamed: 0,label,model,sample,text,train/epoch,eval/f1,eval/accuracy,eval/matthews_correlation,is_best_epoch
0,subfield,bert,1%,title,1,0.558974,0.558974,0.436695,False
10,subfield,bert,1%,abstract,1,0.576923,0.576923,0.465857,False
20,subfield,bert,1%,fulltext,1,0.607692,0.607692,0.501196,False
50,subfield,bert,10%,fulltext,1,0.756791,0.756791,0.693195,False
21,subfield,bert,1%,fulltext,2,0.674359,0.674359,0.594514,False
1,subfield,bert,1%,title,2,0.564103,0.564103,0.447146,False
11,subfield,bert,1%,abstract,2,0.648718,0.648718,0.557235,True
51,subfield,bert,10%,fulltext,2,0.770118,0.770118,0.709671,False
30,subfield,bert,10%,title,1,0.710917,0.710917,0.64516,False
22,subfield,bert,1%,fulltext,3,0.687179,0.687179,0.607154,False


In [22]:
df_bert_s1['text_label'] = df_bert_s1['text'] + '->' + df_bert_s1['label']
baseline_five_epochs_subfield_df_ = baseline_five_epochs_subfield_df.copy()
baseline_five_epochs_subfield_df_['sample'] = 'BASELINE'
baseline_five_epochs_subfield_df_['text'] = 'BASELINE'
baseline_five_epochs_subfield_df_['label'] = 'BASELINE'

fig = px.line(
    data_frame=pd.concat([df_bert_subfield, baseline_five_epochs_subfield_df_]),
    x='train/epoch', 
    y='eval/f1',
    # text='eval/f1',
    line_group='model',
    color='sample',
    line_dash='text',
    # symbol='text',
    markers=True, 
    range_x=None,
    range_y=[0.33,0.9],
    title='BERT Subfield models',
    subtitle=None,
    category_orders=plotly_category_orders,
)
fig.update_layout(xaxis_title='Epochs')
fig.update_layout(yaxis_title='F1')
fig.update_xaxes(tickmode='array', tickvals=list(range(1, 6)))
fig.show()

In [23]:
df = wandb_runs_history[
    # (wandb_runs_history['is_best_epoch'] == True) &
    (wandb_runs_history['model'] == 'bert') &
    # (wandb_runs_history['text'] == 'title') &
    (wandb_runs_history['label'] == 'topic') # &
    # (wandb_runs_history['sample'] == '1%')
].sort_values(by='_step', ascending=True)
df = df[['label', 'model', 'sample', 'text', 'train/epoch', 'eval/f1', 'eval/accuracy', 'eval/matthews_correlation',	'is_best_epoch']]
df_bert_topic = df
df

Unnamed: 0,label,model,sample,text,train/epoch,eval/f1,eval/accuracy,eval/matthews_correlation,is_best_epoch
5,topic,bert,1%,title,1,0.071795,0.071795,0.067861,False
15,topic,bert,1%,abstract,1,0.076923,0.076923,0.075142,False
25,topic,bert,1%,fulltext,1,0.102564,0.102564,0.100528,False
55,topic,bert,10%,fulltext,1,0.38775,0.38775,0.380952,False
26,topic,bert,1%,fulltext,2,0.176923,0.176923,0.17303,False
6,topic,bert,1%,title,2,0.1,0.1,0.093048,False
16,topic,bert,1%,abstract,2,0.164103,0.164103,0.160134,False
56,topic,bert,10%,fulltext,2,0.492568,0.492568,0.486591,False
35,topic,bert,10%,title,1,0.36366,0.36366,0.356768,False
27,topic,bert,1%,fulltext,3,0.230769,0.230769,0.2257,False


In [24]:
df_bert_s1['text_label'] = df_bert_s1['text'] + '->' + df_bert_s1['label']
fig = px.line(
    data_frame=pd.concat([df_bert_topic, baseline_five_epochs_topic_df]),
    x='train/epoch', 
    y='eval/f1',
    # text='eval/f1',
    line_group='model',
    color='sample',
    line_dash='text',
    # symbol='text',
    markers=True, 
    range_x=None,
    range_y=[0.,0.9],
    title='BERT Topic models',
    subtitle=None,
    category_orders=plotly_category_orders,
)
fig.update_layout(xaxis_title='Epochs')
fig.update_layout(yaxis_title='F1')
fig.update_xaxes(tickmode='array', tickvals=list(range(1, 6)))
fig.show()

In [107]:
df_bert_bestof = pd.concat([
    df_bert_subfield[df_bert_subfield['is_best_epoch'] == True].sort_values(by='eval/f1', ascending=False).head(2),
    df_bert_topic[df_bert_topic['is_best_epoch'] == True].sort_values(by='eval/f1', ascending=False).head(2),
])
df_bert_bestof

Unnamed: 0,label,model,sample,text,train/epoch,eval/f1,eval/accuracy,eval/matthews_correlation,is_best_epoch
72,subfield,bert,100%,abstract,3,0.825076,0.825076,0.777063,True
81,subfield,bert,100%,fulltext,2,0.809545,0.809545,0.757211,True
77,topic,bert,100%,abstract,3,0.700515,0.700515,0.697397,True
87,topic,bert,100%,fulltext,3,0.659405,0.659405,0.655866,True


In [25]:
model = 'DistilBERT'
label = 'Subfield'
if label == 'Subfield':
    baseline_df = baseline_five_epochs_subfield_df
    range_min = 0.33
else:
    baseline_df = baseline_five_epochs_topic_df
    range_min = 0.0
df = wandb_runs_history[
    (wandb_runs_history['model'] == model.lower()) &
    (wandb_runs_history['label'] == label.lower())
].sort_values(by='_step', ascending=True)
df = df[['label', 'model', 'sample', 'text', 'train/epoch', 'eval/f1', 'eval/accuracy', 'eval/matthews_correlation',	'is_best_epoch']]
df

fig = px.line(
    data_frame=pd.concat([df, baseline_df]),
    x='train/epoch', 
    y='eval/f1',
    # text='eval/f1',
    line_group='model',
    color='sample',
    line_dash='text',
    # symbol='text',
    markers=True, 
    range_x=None,
    range_y=[range_min,0.9],
    title=f'{model} {label} models',
    subtitle=None,
    category_orders=plotly_category_orders,
)
fig.update_layout(xaxis_title='Epochs')
fig.update_layout(yaxis_title='F1')
fig.update_xaxes(tickmode='array', tickvals=list(range(1, 6)))
fig.show()

In [26]:
model = 'DistilBERT'
label = 'Topic'
if label == 'Subfield':
    baseline_df = baseline_five_epochs_subfield_df
    range_min = 0.33
else:
    baseline_df = baseline_five_epochs_topic_df
    range_min = 0.0
df = wandb_runs_history[
    (wandb_runs_history['model'] == model.lower()) &
    (wandb_runs_history['label'] == label.lower())
].sort_values(by='_step', ascending=True)
df = df[['label', 'model', 'sample', 'text', 'train/epoch', 'eval/f1', 'eval/accuracy', 'eval/matthews_correlation',	'is_best_epoch']]
df

fig = px.line(
    data_frame=pd.concat([df, baseline_df]),
    x='train/epoch', 
    y='eval/f1',
    # text='eval/f1',
    line_group='model',
    color='sample',
    line_dash='text',
    # symbol='text',
    markers=True, 
    range_x=None,
    range_y=[range_min,0.9],
    title=f'{model} {label} models',
    subtitle=None,
    category_orders=plotly_category_orders,
)
fig.update_layout(xaxis_title='Epochs')
fig.update_layout(yaxis_title='F1')
fig.update_xaxes(tickmode='array', tickvals=list(range(1, 6)))
fig.show()

In [27]:
model = 'DeBERTa'
label = 'Subfield'
if label == 'Subfield':
    baseline_df = baseline_five_epochs_subfield_df
    range_min = 0.33
else:
    baseline_df = baseline_five_epochs_topic_df
    range_min = 0.0
df = wandb_runs_history[
    (wandb_runs_history['model'] == model.lower()) &
    (wandb_runs_history['label'] == label.lower())
].sort_values(by='_step', ascending=True)
df = df[['label', 'model', 'sample', 'text', 'train/epoch', 'eval/f1', 'eval/accuracy', 'eval/matthews_correlation',	'is_best_epoch']]
df

fig = px.line(
    data_frame=pd.concat([df, baseline_df]),
    x='train/epoch', 
    y='eval/f1',
    # text='eval/f1',
    line_group='model',
    color='sample',
    line_dash='text',
    # symbol='text',
    markers=True, 
    range_x=None,
    range_y=[range_min,0.9],
    title=f'{model} {label} models',
    subtitle=None,
    category_orders=plotly_category_orders,
)
fig.update_layout(xaxis_title='Epochs')
fig.update_layout(yaxis_title='F1')
fig.update_xaxes(tickmode='array', tickvals=list(range(1, 6)))
fig.show()

In [28]:
model = 'DeBERTa'
label = 'Topic'
if label == 'Subfield':
    baseline_df = baseline_five_epochs_subfield_df
    range_min = 0.33
else:
    baseline_df = baseline_five_epochs_topic_df
    range_min = 0.0
df = wandb_runs_history[
    (wandb_runs_history['model'] == model.lower()) &
    (wandb_runs_history['label'] == label.lower())
].sort_values(by='_step', ascending=True)
df = df[['label', 'model', 'sample', 'text', 'train/epoch', 'eval/f1', 'eval/accuracy', 'eval/matthews_correlation',	'is_best_epoch']]
df

fig = px.line(
    data_frame=pd.concat([df, baseline_df]),
    x='train/epoch', 
    y='eval/f1',
    # text='eval/f1',
    line_group='model',
    color='sample',
    line_dash='text',
    # symbol='text',
    markers=True, 
    range_x=None,
    range_y=[range_min,0.9],
    title=f'{model} {label} models',
    subtitle=None,
    category_orders=plotly_category_orders,
)
fig.update_layout(xaxis_title='Epochs')
fig.update_layout(yaxis_title='F1')
fig.update_xaxes(tickmode='array', tickvals=list(range(1, 6)))
fig.show()

In [29]:
model = 'RoBERTa'
label = 'Subfield'
if label == 'Subfield':
    baseline_df = baseline_five_epochs_subfield_df
    range_min = 0.33
else:
    baseline_df = baseline_five_epochs_topic_df
    range_min = 0.0
df = wandb_runs_history[
    (wandb_runs_history['model'] == model.lower()) &
    (wandb_runs_history['label'] == label.lower())
].sort_values(by='_step', ascending=True)
df = df[['label', 'model', 'sample', 'text', 'train/epoch', 'eval/f1', 'eval/accuracy', 'eval/matthews_correlation',	'is_best_epoch']]
df

fig = px.line(
    data_frame=pd.concat([df, baseline_df]),
    x='train/epoch', 
    y='eval/f1',
    # text='eval/f1',
    line_group='model',
    color='sample',
    line_dash='text',
    # symbol='text',
    markers=True, 
    range_x=None,
    range_y=[range_min,0.9],
    title=f'{model} {label} models',
    subtitle=None,
    category_orders=plotly_category_orders,
)
fig.update_layout(xaxis_title='Epochs')
fig.update_layout(yaxis_title='F1')
fig.update_xaxes(tickmode='array', tickvals=list(range(1, 6)))
fig.show()

In [30]:
model = 'RoBERTa'
label = 'Topic'
if label == 'Subfield':
    baseline_df = baseline_five_epochs_subfield_df
    range_min = 0.33
else:
    baseline_df = baseline_five_epochs_topic_df
    range_min = 0.0
df = wandb_runs_history[
    (wandb_runs_history['model'] == model.lower()) &
    (wandb_runs_history['label'] == label.lower())
].sort_values(by='_step', ascending=True)
df = df[['label', 'model', 'sample', 'text', 'train/epoch', 'eval/f1', 'eval/accuracy', 'eval/matthews_correlation',	'is_best_epoch']]
df

fig = px.line(
    data_frame=pd.concat([df, baseline_df]),
    x='train/epoch', 
    y='eval/f1',
    # text='eval/f1',
    line_group='model',
    color='sample',
    line_dash='text',
    # symbol='text',
    markers=True, 
    range_x=None,
    range_y=[range_min,0.9],
    title=f'{model} {label} models',
    subtitle=None,
    category_orders=plotly_category_orders,
)
fig.update_layout(xaxis_title='Epochs')
fig.update_layout(yaxis_title='F1')
fig.update_xaxes(tickmode='array', tickvals=list(range(1, 6)))
fig.show()

In [32]:
model = 'ModernBERT'
label = 'Subfield'
if label == 'Subfield':
    baseline_df = baseline_five_epochs_subfield_df
    range_min = 0.33
else:
    baseline_df = baseline_five_epochs_topic_df
    range_min = 0.0
df = wandb_runs_history[
    (wandb_runs_history['model'] == model) &
    (wandb_runs_history['label'] == label.lower())
].sort_values(by='_step', ascending=True)
df = df[df['run_id']!='ModernBERT-subfield-title-s1-0917175205-x06d3s-algo-1']
df = df[['label', 'model', 'sample', 'text', 'train/epoch', 'eval/f1', 'eval/accuracy', 'eval/matthews_correlation',	'is_best_epoch']]
df

fig = px.line(
    data_frame=pd.concat([df, baseline_df]),
    x='train/epoch', 
    y='eval/f1',
    # text='eval/f1',
    line_group='model',
    color='sample',
    line_dash='text',
    # symbol='text',
    markers=True, 
    range_x=None,
    range_y=[range_min,0.9],
    title=f'{model} {label} models',
    subtitle=None,
    category_orders=plotly_category_orders,
)
fig.update_layout(xaxis_title='Epochs')
fig.update_layout(yaxis_title='F1')
fig.update_xaxes(tickmode='array', tickvals=list(range(1, 6)))
fig.show()

In [33]:
model = 'ModernBERT'
label = 'Topic'
if label == 'Subfield':
    baseline_df = baseline_five_epochs_subfield_df
    range_min = 0.33
else:
    baseline_df = baseline_five_epochs_topic_df
    range_min = 0.0
df = wandb_runs_history[
    (wandb_runs_history['model'] == model) &
    (wandb_runs_history['label'] == label.lower())
].sort_values(by='_step', ascending=True)
df = df[df['run_id']!='ModernBERT-subfield-title-s1-0917175205-x06d3s-algo-1']
df = df[['label', 'model', 'sample', 'text', 'train/epoch', 'eval/f1', 'eval/accuracy', 'eval/matthews_correlation',	'is_best_epoch']]
df

fig = px.line(
    data_frame=pd.concat([df, baseline_df]),
    x='train/epoch', 
    y='eval/f1',
    # text='eval/f1',
    line_group='model',
    color='sample',
    line_dash='text',
    # symbol='text',
    markers=True, 
    range_x=None,
    range_y=[range_min,0.9],
    title=f'{model} {label} models',
    subtitle=None,
    category_orders=plotly_category_orders,
)
fig.update_layout(xaxis_title='Epochs')
fig.update_layout(yaxis_title='F1')
fig.update_xaxes(tickmode='array', tickvals=list(range(1, 6)))
fig.show()

In [34]:
model = 'SciBERT'
label = 'Subfield'
if label == 'Subfield':
    baseline_df = baseline_five_epochs_subfield_df
    range_min = 0.33
else:
    baseline_df = baseline_five_epochs_topic_df
    range_min = 0.0
df = wandb_runs_history[
    (wandb_runs_history['model'] == model.lower()) &
    (wandb_runs_history['label'] == label.lower())
].sort_values(by='_step', ascending=True)
df = df[['label', 'model', 'sample', 'text', 'train/epoch', 'eval/f1', 'eval/accuracy', 'eval/matthews_correlation',	'is_best_epoch']]
df

fig = px.line(
    data_frame=pd.concat([df, baseline_df]),
    x='train/epoch', 
    y='eval/f1',
    # text='eval/f1',
    line_group='model',
    color='sample',
    line_dash='text',
    # symbol='text',
    markers=True, 
    range_x=None,
    range_y=[range_min,0.9],
    title=f'{model} {label} models',
    subtitle=None,
    category_orders=plotly_category_orders,
)
fig.update_layout(xaxis_title='Epochs')
fig.update_layout(yaxis_title='F1')
fig.update_xaxes(tickmode='array', tickvals=list(range(1, 6)))
fig.show()

In [35]:
model = 'SciBERT'
label = 'Topic'
if label == 'Subfield':
    baseline_df = baseline_five_epochs_subfield_df
    range_min = 0.33
else:
    baseline_df = baseline_five_epochs_topic_df
    range_min = 0.0
df = wandb_runs_history[
    (wandb_runs_history['model'] == model.lower()) &
    (wandb_runs_history['label'] == label.lower())
].sort_values(by='_step', ascending=True)
df = df[['label', 'model', 'sample', 'text', 'train/epoch', 'eval/f1', 'eval/accuracy', 'eval/matthews_correlation',	'is_best_epoch']]
df

fig = px.line(
    data_frame=pd.concat([df, baseline_df]),
    x='train/epoch', 
    y='eval/f1',
    # text='eval/f1',
    line_group='model',
    color='sample',
    line_dash='text',
    # symbol='text',
    markers=True, 
    range_x=None,
    range_y=[range_min,0.9],
    title=f'{model} {label} models',
    subtitle=None,
    category_orders=plotly_category_orders,
)
fig.update_layout(xaxis_title='Epochs')
fig.update_layout(yaxis_title='F1')
fig.update_xaxes(tickmode='array', tickvals=list(range(1, 6)))
fig.show()

In [36]:
model = 'Longformer'
label = 'Subfield'
if label == 'Subfield':
    baseline_df = baseline_five_epochs_subfield_df
    range_min = 0.33
else:
    baseline_df = baseline_five_epochs_topic_df
    range_min = 0.0
df = wandb_runs_history[
    (wandb_runs_history['model'] == model.lower()) &
    (wandb_runs_history['label'] == label.lower())
].sort_values(by='_step', ascending=True)
df = df[['label', 'model', 'sample', 'text', 'train/epoch', 'eval/f1', 'eval/accuracy', 'eval/matthews_correlation',	'is_best_epoch']]
df

fig = px.line(
    data_frame=pd.concat([df, baseline_df]),
    x='train/epoch', 
    y='eval/f1',
    # text='eval/f1',
    line_group='model',
    color='sample',
    line_dash='text',
    # symbol='text',
    markers=True, 
    range_x=None,
    range_y=[range_min,0.9],
    title=f'{model} {label} models',
    subtitle=None,
    category_orders=plotly_category_orders,
)
fig.update_layout(xaxis_title='Epochs')
fig.update_layout(yaxis_title='F1')
fig.update_xaxes(tickmode='array', tickvals=list(range(1, 6)))
fig.show()

In [37]:
model = 'Longformer'
label = 'Topic'
if label == 'Subfield':
    baseline_df = baseline_five_epochs_subfield_df
    range_min = 0.33
else:
    baseline_df = baseline_five_epochs_topic_df
    range_min = 0.0
df = wandb_runs_history[
    (wandb_runs_history['model'] == model.lower()) &
    (wandb_runs_history['label'] == label.lower())
].sort_values(by='_step', ascending=True)
df = df[['label', 'model', 'sample', 'text', 'train/epoch', 'eval/f1', 'eval/accuracy', 'eval/matthews_correlation',	'is_best_epoch']]
df

fig = px.line(
    data_frame=pd.concat([df, baseline_df]),
    x='train/epoch', 
    y='eval/f1',
    # text='eval/f1',
    line_group='model',
    color='sample',
    line_dash='text',
    # symbol='text',
    markers=True, 
    range_x=None,
    range_y=[range_min,0.9],
    title=f'{model} {label} models',
    subtitle=None,
    category_orders=plotly_category_orders,
)
fig.update_layout(xaxis_title='Epochs')
fig.update_layout(yaxis_title='F1')
fig.update_xaxes(tickmode='array', tickvals=list(range(1, 6)))
fig.show()

In [38]:
model = 'BigBird'
label = 'Subfield'
if label == 'Subfield':
    baseline_df = baseline_five_epochs_subfield_df
    range_min = 0.33
else:
    baseline_df = baseline_five_epochs_topic_df
    range_min = 0.0
df = wandb_runs_history[
    (wandb_runs_history['model'] == model.lower()) &
    (wandb_runs_history['label'] == label.lower())
].sort_values(by='_step', ascending=True)
df = df[['label', 'model', 'sample', 'text', 'train/epoch', 'eval/f1', 'eval/accuracy', 'eval/matthews_correlation',	'is_best_epoch']]
df

fig = px.line(
    data_frame=pd.concat([df, baseline_df]),
    x='train/epoch', 
    y='eval/f1',
    # text='eval/f1',
    line_group='model',
    color='sample',
    line_dash='text',
    # symbol='text',
    markers=True, 
    range_x=None,
    range_y=[range_min,0.9],
    title=f'{model} {label} models',
    subtitle=None,
    category_orders=plotly_category_orders,
)
fig.update_layout(xaxis_title='Epochs')
fig.update_layout(yaxis_title='F1')
fig.update_xaxes(tickmode='array', tickvals=list(range(1, 6)))
fig.show()

In [41]:
model = 'BigBird'
label = 'Topic'
if label == 'Subfield':
    baseline_df = baseline_five_epochs_subfield_df
    range_min = 0.33
else:
    baseline_df = baseline_five_epochs_topic_df
    range_min = 0.0
df = wandb_runs_history[
    (wandb_runs_history['model'] == model.lower()) &
    (wandb_runs_history['label'] == label.lower())
].sort_values(by='_step', ascending=True)
df = df[['label', 'model', 'sample', 'text', 'train/epoch', 'eval/f1', 'eval/accuracy', 'eval/matthews_correlation',	'is_best_epoch']]
df

fig = px.line(
    data_frame=pd.concat([df, baseline_df]),
    x='train/epoch', 
    y='eval/f1',
    # text='eval/f1',
    line_group='model',
    color='sample',
    line_dash='text',
    # symbol='text',
    markers=True, 
    range_x=None,
    range_y=[range_min,0.9],
    title=f'{model} {label} models',
    subtitle=None,
    category_orders=plotly_category_orders,
)
fig.update_layout(xaxis_title='Epochs')
fig.update_layout(yaxis_title='F1')
fig.update_xaxes(tickmode='array', tickvals=list(range(1, 6)))
fig.show()