In [1]:
import pandas as pd
import plotly.express as px
import numpy as np
import plotly.io as pio

In [2]:
df = pd.read_csv('../experiments/data.csv')
df['datetime'] = pd.to_datetime(df['datetime'])
df['time_from_start'] = df.groupby('experiment')['datetime'].transform(lambda x: (x - x.min()).dt.total_seconds() / 3600)


In [3]:

# Round both datetime and time_from_start
df['datetime'] = pd.to_datetime(df['datetime']).dt.round('s')
df['time_from_start'] = df['time_from_start'].round(2)  # Round to 2 decimal places

# Pivot using both rounded values
pivot_df = df.pivot_table(
    index=['datetime', 'time_from_start', 'experiment'], 
    columns='tag', 
    values='value',
    aggfunc='first'
).reset_index()

# Flatten column names
pivot_df.columns.name = None

In [4]:
# change experiement names
pivot_df['experiment'] = pivot_df['experiment'].replace({
    'exp1': 'Standard Network',
    'exp2': 'Small Network',
    'exp3': 'Large Network',
    'transformer_run': 'Standard Network w. New Transformer',
})

In [5]:
# metric options
df['tag'].unique()

array(['Loss/train', 'Loss/val', 'Accuracy/train', 'Accuracy/val',
       'BLEU/val', 'LR'], dtype=object)

In [10]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

# Create subplots with 1 row and 2 columns
fig = make_subplots(
    rows=1, cols=2,
    subplot_titles=('Model Accuracy Over Time', 'Training Loss Over Time'),
    horizontal_spacing=0.05
)

# Get unique experiments for consistent coloring
experiments = pivot_df['experiment'].unique()
colors = px.colors.qualitative.Plotly[:len(experiments)]

# Add traces for each experiment
for i, experiment in enumerate(experiments):
    exp_data = pivot_df[pivot_df['experiment'] == experiment]
    
    # Add accuracy trace to first subplot
    fig.add_trace(
        go.Scatter(
            x=exp_data['time_from_start'],
            y=exp_data['Accuracy/val'],
            mode='lines+markers',
            name=experiment,
            line=dict(color=colors[i]),
            legendgroup=experiment,
        ),
        row=1, col=1
    )
    
    # Add training loss trace to second subplot
    fig.add_trace(
        go.Scatter(
            x=exp_data['time_from_start'],
            y=exp_data['Loss/train'],  # Use actual training loss column
            mode='lines+markers',
            name=experiment,
            line=dict(color=colors[i]),
            legendgroup=experiment,
            showlegend=False  # Don't show duplicate legend entries
        ),
        row=1, col=2
    )

# Update layout
fig.update_layout(
    width=2000,  # Increased width for two subplots
    height=560,
    legend=dict(x=1.02, y=1, xanchor='left', yanchor='top')
)

# Update x-axes
fig.update_xaxes(
    title_text='Training Time (hours)',
    range=[-0.1, 4],
    row=1, col=1
)
fig.update_xaxes(
    title_text='Training Time (hours)',
    range=[-0.1, 4],
    row=1, col=2
)

# Update y-axes
fig.update_yaxes(
    title_text='Validation Accuracy',
    row=1, col=1
)
fig.update_yaxes(
    title_text='Training Loss',
    row=1, col=2
)

# Add annotation to first subplot
fig.add_annotation(
    x=1.9, y=0.6, 
    text="Each marker represents<br>an epoch of training",
    showarrow=False, 
    font=dict(size=12, color="black"),
    row=1, col=1
)

# Add arrow to first subplot
fig.add_annotation(
    x=1.84, y=0.635, 
    ax=-5, ay=30,
    showarrow=True,
    arrowhead=2,
    arrowsize=1,
    arrowwidth=2,
    arrowcolor='black',
    font=dict(size=12, color="black"),
    row=1, col=1
)

# anchor legend to top right of right subplot
fig.update_layout(
    legend=dict(
        x=.85, y=1, 
        xanchor='left', 
        yanchor='top',
        title_text='Experiments',
        orientation='v'
    )
)

pio.write_image(fig, 'presentation_dual_1.jpg', 'jpg', scale=1)

fig.show()

## Creating Table
If/when we want to compare model parameters in paper. 

In [None]:
import os
import json

def load_experiment_args(dir):
    args_file = os.path.join(dir, 'args.json')
    if os.path.exists(args_file):
        with open(args_file, 'r') as f:
            return json.load(f)
    return None


In [None]:
experiments_dir = '../experiments'
experiment_args = {}
for subdir in os.listdir(experiments_dir):
    subdir_path = os.path.join(experiments_dir, subdir)
    if os.path.isdir(subdir_path):
        args = load_experiment_args(subdir_path)
        if args:
            experiment_args[subdir] = args

In [None]:
args_df = pd.DataFrame.from_dict(experiment_args, orient='index')
present = args_df.T.loc[['encoder_d', 'num_layers', 'batch_size', 'pretrained'], 
              ['exp1', 'exp2', 'exp3', 'transformer_run']]

present.columns = ['Standard Network', 'Small Network', 'Large Network', 'Standard Network w. New Transformer']
present.index = ['En/Decoder Dimensions', 'Number of Layers', 'Batch Size', 'Pretrained Model?']

# swap order of columns to match the plot
present = present[['Small Network', 'Standard Network', 'Large Network', 'Standard Network w. New Transformer']]

present.T

Unnamed: 0,En/Decoder Dimensions,Number of Layers,Batch Size,Pretrained Model?
Small Network,64,1,128,False
Standard Network,256,2,32,False
Large Network,512,3,32,True
Standard Network w. New Transformer,256,3,32,True
