In [1]:
import neptune
import pandas as pd
from typing import List, Dict
from functools import partial
import plotly.express as px

In [2]:
project = neptune.init_project(
    project="pmtest/llm-random", 
    mode="read-only",
)

https://app.neptune.ai/pmtest/llm-random/


#### Plot loss vs c hparam in post_add, std_norm v1 model

In [3]:
def fetch_runs_table_add_series(df: pd.DataFrame, neptune_series: List[str]) -> pd.DataFrame:
    records = []
    for _, row in df.iterrows():
        sys_id = row["sys/id"]
        series_row = [sys_id]
        run = neptune.init_run(with_id=sys_id, mode="read-only")
        for series in neptune_series:
            step_value = run[series].fetch_values()
            series_row.append([{'step': s, 'value': v} for s, v in zip(step_value.step, step_value.value)])
        records.append(series_row)
    
    extrend_df = pd.DataFrame(records, columns=["sys/id"] + neptune_series)
    return pd.merge(df, extrend_df, on="sys/id")
        

In [4]:

run_df = project.fetch_runs_table(
    id=["LLMRANDOM-13806", "LLMRANDOM-13949", "LLMRANDOM-13813", "LMRANDOM-13812", "LLMRANDOM-13807", "LLMRANDOM-13756", "LLMRANDOM-13812"],
    columns=['sys/failed', 'sys/id', 'sys/name']
    ).to_pandas()

extended_df = fetch_runs_table_add_series(run_df, ["loss"])

https://app.neptune.ai/pmtest/llm-random/e/LLMRANDOM-13949
https://app.neptune.ai/pmtest/llm-random/e/LLMRANDOM-13813
https://app.neptune.ai/pmtest/llm-random/e/LLMRANDOM-13812
https://app.neptune.ai/pmtest/llm-random/e/LLMRANDOM-13807
https://app.neptune.ai/pmtest/llm-random/e/LLMRANDOM-13806
https://app.neptune.ai/pmtest/llm-random/e/LLMRANDOM-13756


In [5]:
extended_df

Unnamed: 0,sys/failed,sys/id,sys/name,loss
0,False,LLMRANDOM-13949,gn_c_grid_exp_4_post_add_v1_c_neg_0_1 medium_g...,"[{'step': 160.0, 'value': 8.784871664643287}, ..."
1,False,LLMRANDOM-13813,gn_c_grid_exp_0_post_add_v1_c_0_8 medium_grad_...,"[{'step': 160.0, 'value': 9.31532644033432}, {..."
2,False,LLMRANDOM-13812,gn_c_grid_exp_0_post_add_v1_c_0_6 medium_grad_...,"[{'step': 160.0, 'value': 9.319578731060028}, ..."
3,False,LLMRANDOM-13807,gn_c_grid_exp_0_post_add_v1_c_0_2 medium_grad_...,"[{'step': 160.0, 'value': 9.175372809171677}, ..."
4,False,LLMRANDOM-13806,std_grad_norm_sanity_check medium_grad_norm_st...,"[{'step': 160.0, 'value': 8.435521373152733}, ..."
5,False,LLMRANDOM-13756,grad_norm_formulas_exp_9_post_add_v1 medium_gr...,"[{'step': 160.0, 'value': 9.322496002912521}, ..."


In [6]:
step_intersection = set(map(lambda x: x['step'], extended_df.iloc[0]['loss']))
for steps in extended_df["loss"]:
    step_intersection = step_intersection.intersection(set(map(lambda x: x['step'], steps)))

cmp_step = int(max(step_intersection))

In [7]:
def find_matching_step_idx(steps: List[Dict[str, float]], cmp_step: float) -> int:
    for i, step in enumerate(steps):
        if step['step'] == cmp_step:
            return i

In [8]:
extended_df['cmp_loss_idx'] = extended_df['loss'].apply(partial(find_matching_step_idx, cmp_step=cmp_step))

In [9]:
extended_df[f'loss_{cmp_step}'] = extended_df.apply(lambda x: x['loss'][x.cmp_loss_idx]['value'], axis=1)

In [10]:
list(extended_df['sys/name'])

['gn_c_grid_exp_4_post_add_v1_c_neg_0_1 medium_grad_norm_std_norm_post_add_v1_c_grid 24_09-12_09:57:11ng',
 'gn_c_grid_exp_0_post_add_v1_c_0_8 medium_grad_norm_std_norm_post_add_v1_c_grid 24_09-11_23:43:56SQ',
 'gn_c_grid_exp_0_post_add_v1_c_0_6 medium_grad_norm_std_norm_post_add_v1_c_grid 24_09-11_23:43:56iQ',
 'gn_c_grid_exp_0_post_add_v1_c_0_2 medium_grad_norm_std_norm_post_add_v1_c_grid 24_09-11_19:45:35XQ',
 'std_grad_norm_sanity_check medium_grad_norm_std_norm_all_sanity_check 24_09-11_19:21:046g',
 'grad_norm_formulas_exp_9_post_add_v1 medium_grad_norm_std_norm_post_add_v1 24_09-10_12:47:15Ug']

In [11]:
name_c_map = {
    'gn_c_grid_exp_4_post_add_v1_c_neg_0_1 medium_grad_norm_std_norm_post_add_v1_c_grid 24_09-12_09:57:11ng': -0.1,
    'gn_c_grid_exp_0_post_add_v1_c_0_8 medium_grad_norm_std_norm_post_add_v1_c_grid 24_09-11_23:43:56SQ': 0.8,
    'gn_c_grid_exp_0_post_add_v1_c_0_6 medium_grad_norm_std_norm_post_add_v1_c_grid 24_09-11_23:43:56iQ': 0.6,
    'gn_c_grid_exp_0_post_add_v1_c_0_2 medium_grad_norm_std_norm_post_add_v1_c_grid 24_09-11_19:45:35XQ': 0.2,
    'std_grad_norm_sanity_check medium_grad_norm_std_norm_all_sanity_check 24_09-11_19:21:046g': 0.0,
    'grad_norm_formulas_exp_9_post_add_v1 medium_grad_norm_std_norm_post_add_v1 24_09-10_12:47:15Ug': 1.0
}

extended_df['c'] = extended_df['sys/name'].apply(lambda x: name_c_map[x])

In [12]:
fig = px.line(extended_df.sort_values(by='c'), x="c", y=f"loss_{cmp_step}", title=f"c vs loss (step {cmp_step})", text='sys/id')
fig.update_traces(textposition="bottom right")
fig.show()