In [1]:
import boto3
import importlib
import json
import os
import pathlib
import sys
import wandb

import awswrangler as wr
import numpy as np
import pandas as pd

from botocore.exceptions import ClientError
from IPython.display import display

# Adding ../01_modules or ./01_modules to the system path so that we can load modules from 
# there as well
if '__file__' in globals():
    script_dir = pathlib.Path(__file__).parent.resolve()
else:
    script_dir = pathlib.Path().absolute()
modules_path_in_dev = os.path.abspath(os.path.join(script_dir, '..', '01_modules'))
modules_path_in_prod = os.path.abspath(os.path.join(script_dir, '01_modules'))
if os.path.exists(modules_path_in_dev):
    sys.path.append(modules_path_in_dev)
if os.path.exists(modules_path_in_prod):
    sys.path.append(modules_path_in_prod)


# # Jupyter only reads a local module the first time after 
# # kernel start. Re-running a cell with 
# # "from mymodulename import *" would not change
# # anything, even if the imported module has since changed.
# # As a workaround, we need to directly load the module, 
# # use importlib.reload to reload it and then import * 
import utils
_ = importlib.reload(utils)
import config
_ = importlib.reload(config) 

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml
config.py loaded: v0.1
utils.py loaded: v0.2.12
utils.py loaded: v0.2.12
config.py loaded: v0.1


In [2]:
wandb_api_key = utils.get_secret(region_name=config.AWS_REGION, secret_name='WeightsAndBiases')['api_key']
wandb.login(key=wandb_api_key)

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/sagemaker-user/.netrc
[34m[1mwandb[0m: Currently logged in as: [33msteve-attila-kopias[0m to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [4]:
class WandbRuns:
    def __init__(self, entity=config.WANDB_ENTITY, project=config.WANDB_PROJECT, and_tags=None, filters={}):
        self.entity = entity
        self.project = project
        self.and_tags = and_tags
        self.filters = filters
        if 'state' not in filters:
            self.filters['state'] = 'finished'
        # To test:
        # self.filters['config.job_name'] = 'scibert-subfield-fulltext-s100-0917214139'

        if self.and_tags is not None:
            self.filters['tags'] = {'$all': self.and_tags}

        self.root = f'{self.entity}/{self.project}'
        self.runs = None
        self.runs_history = None

        self.api = wandb.Api()
        
        self.get_runs()
        self.get_runs_history()

    def get_runs(self):
        self.runs = self.api.runs(
            self.root,
            filters=self.filters,
        )
    
    def get_runs_history(self):
        runs_details = []
        for run in self.runs:                
            needed_keys = [
                '_step',
                '_runtime',
                '_timestamp',

                'train/epoch',
                'train/global_step',

                'train/loss',
                'eval/loss',

                'eval/f1',
                'eval/accuracy',
                'eval/matthews_correlation',
            ]

            history_df_full = run.history(
                pandas=True,
                samples=500,
                x_axis='_epoch',
            )
            needed_cols = [v for v in needed_keys if v in history_df_full.columns.to_list()]
            history_df = history_df_full[needed_cols]

            history_df = history_df.sort_values(by=['_step', '_runtime', '_timestamp'])
            history_df.loc[:, 'train/loss'] = history_df.loc[:, 'train/loss'].ffill()
            history_df = history_df[history_df['train/epoch'] == np.round(history_df['train/epoch']).astype('Int64')]
            history_df['train/epoch'] = history_df['train/epoch'].astype('Int64')
            history_df = history_df[history_df['eval/loss'].notna()]

            history_df['run_id'] = run.id
            tag_keys = []
            for tag in run.tags:
                tag_key, tag_value = tag.split(': ')
                if tag_key != 'instance':
                    history_df[tag_key] = tag_value
                    tag_keys.append(tag_key)

            history_df = history_df[['run_id'] + tag_keys + needed_cols]

            best_f1_epoch_idx = history_df['eval/f1'].idxmax()
            history_df['is_best_epoch'] = False
            history_df.loc[best_f1_epoch_idx, 'is_best_epoch'] = True

            if self.runs_history is None:
                self.runs_history = history_df
            else:
                self.runs_history = pd.concat([self.runs_history, history_df])
            self.runs_history = self.runs_history.reset_index(drop=True)

            run_details = {
                # 'name': run.name,
                # 'id': run.id,
                # 'url': run.url,
                # 'state': run.state,  # running, finished, crashed, killed, preempting, preempted
                # 'tags': run.tags,  # list
                # 'config': run.config,  # dict
                # 'summary': summary,  # dict
                # 'metadata ': run.metadata,
                # 'created_at ': run.created_at,
                #'system_metrics ': run.system_metrics,
    
                # 'history_keys': run.history_keys,
                # 'history_dict': history_dict,
                # 'history_df': history_df,
                # 'html': run.to_html(height=420, hidden=False)
            }
            runs_details.append(run_details)
        self.runs_details = runs_details

wandb_log = WandbRuns()

In [5]:
wandb_log.runs_history[
    (wandb_log.runs_history['is_best_epoch'] == True) &
    (wandb_log.runs_history['text'] != 'title') &
    (wandb_log.runs_history['label'] == 'topic') &
    (wandb_log.runs_history['sample'] == '100%')
].sort_values(by='eval/f1', ascending=False)

Unnamed: 0,run_id,label,model,sample,text,_step,_runtime,_timestamp,train/epoch,train/global_step,train/loss,eval/loss,eval/f1,eval/accuracy,eval/matthews_correlation,is_best_epoch
442,scibert-topic-abstract-s100-0917214440-y0us2l-...,topic,scibert,100%,abstract,31,13585.782862,1758159000.0,3,14568,0.7947,1.205999,0.706871,0.706871,0.703856,True
694,longformer-topic-abstract-s100-0918102051-o64s...,topic,longformer,100%,abstract,52,80698.401466,1758279000.0,5,24280,0.6323,1.231289,0.704437,0.704437,0.701366,True
419,roberta-topic-abstract-s100-0917145422-wwrsxz-...,topic,roberta,100%,abstract,28,12647.500345,1758154000.0,5,12140,0.7266,1.228225,0.703873,0.703873,0.700799,True
526,ModernBERT-topic-abstract-s100-0918004324-7q71...,topic,ModernBERT,100%,abstract,20,15729.939139,1758172000.0,2,9712,0.915,1.161514,0.703206,0.703206,0.70006,True
608,bigbird-topic-abstract-s100-0918020047-zn8l1y-...,topic,bigbird,100%,abstract,22,20557.221457,1758184000.0,4,9712,0.8382,1.223253,0.702745,0.702745,0.69964,True
724,deberta-topic-abstract-s100-0918145257-mccxcr-...,topic,deberta,100%,abstract,28,22263.979882,1758245000.0,5,12140,0.8016,1.276827,0.700823,0.700823,0.697723,True
77,bert-topic-abstract-s100-0917032639-7bunqv-algo-1,topic,bert,100%,abstract,31,13538.188686,1758093000.0,3,14568,0.8994,1.234328,0.700515,0.700515,0.697397,True
302,distilbert-topic-abstract-s100-0917115955-sp0l...,topic,distilbert,100%,abstract,31,7255.338266,1758137000.0,3,14568,0.9168,1.219088,0.696594,0.696594,0.693465,True
432,scibert-topic-fulltext-s100-0917213838-zy7a39-...,topic,scibert,100%,fulltext,31,14376.016369,1758160000.0,3,14568,0.9994,1.37829,0.671374,0.671374,0.667955,True
701,ModernBERT-topic-fulltext-s100-0918144354-4c5g...,topic,ModernBERT,100%,fulltext,10,9236.907684,1758217000.0,2,4856,1.1846,1.34499,0.66858,0.66858,0.665092,True
