In [157]:
import pandas
import numpy as np
from glob import glob
import altair as alt

def plot_action_value_by_episode_and_reset(df, expected_counts):
    lr = df['learning_rate'].iloc[0]
    counts = df.groupby('weight_reset').count()/1000
    assert (counts.max()['reward'] and counts.min()['reward'] == expected_counts and "sanity check on filter")
    
    source = df[(df['weight_reset'] == 0)].groupby('episode').mean()[['q_left', 'q_down', 'q_right', 'q_up']].unstack().reset_index()
    source.columns=['Action', 'Episode', 'Value']
    source['Action'] = source['Action'].map(lambda x: x.replace('q_', '').title())
    
    # Create a common chart object
    chart1 = alt.Chart(source).encode(
        alt.Color("Action").legend(alt.Legend(title=None, symbolStrokeWidth=10, labelFontSize=14, orient='none', legendX=700, legendY=50))
    ).properties(
        title= 'Action Value by Episode (LR={})'.format(lr)
    )
    
    # Draw the line
    line1 = chart1.mark_line().encode(
        x="Episode:Q",
        y="Value:Q"
    )
    
    source2 = df[(df['weight_reset'] == 100)].groupby('episode').mean()[['q_left', 'q_down', 'q_right', 'q_up']].unstack().reset_index()
    source2.columns=['Action', 'Episode', 'Value']
    source2['Action'] = source2['Action'].map(lambda x: x.replace('q_', '').title())
    
    # Create a common chart object
    chart2 = alt.Chart(source2).encode(
        alt.Color("Action")
    ).properties(
        title= 'Action Value by Episode With Resets (LR={})'.format(lr)
    )
    
    line2 = chart2.mark_line().encode(
        x="Episode:Q",
        y="Value:Q"
    )
    
    hconcat_chart = alt.hconcat(line1, line2).configure_title(fontSize=18).configure_axis(titleFontSize=16, labelFontSize=14)
    return hconcat_chart
    

def plot_reward_by_episode_and_reset(df, expected_counts):
    lr = df['learning_rate'].iloc[0]

    counts = df.groupby('weight_reset').count()/1000
    assert (counts.max()['reward'] and counts.min()['reward'] == expected_counts and "sanity check on filter")
    
    source = df[(df['weight_reset'].isin([0, 100]))].groupby(['episode', 'weight_reset']).mean()['reward'].reset_index()
    source.columns=['Episode', 'Reset', 'Reward']
    
    # Create a common chart object
    chart1 = alt.Chart(source).encode(
        alt.Color("Reset:N").legend(alt.Legend(titleFontSize=16, symbolStrokeWidth=10, labelFontSize=14))
    ).properties(
        title= 'Reward Overtime (LR={})'.format(lr),
        width=400
    )
    
    
    # Draw the line
    line1 = chart1.mark_line().encode(
        x="Episode:Q",
        y="Reward:Q"
    ).configure_axis(titleFontSize=16, labelFontSize=14).configure_title(fontSize=18)
    
    return line1


def plot_hyperparams(df, expected_counts):
    lr = df['learning_rate'].iloc[0]
    counts = df.groupby(['replay_ratio', 'weight_reset']).count()/1000
    assert (counts.max()['reward'] and counts.min()['reward'] == expected_counts and "sanity check on filter")
    
    source = df[(df['learning_rate'] == lr)].groupby(['replay_ratio', 'weight_reset']).mean().reset_index()
    source['formatted_replay_ratio'] = 'Replay Ratio: ' + source['replay_ratio'].astype(str)
    source['Reset Frequency'] = source['weight_reset']
    
    base_chart = alt.Chart(source).transform_aggregate(
        mean_reward='mean(reward)',
        groupby=['Reset Frequency', 'formatted_replay_ratio']
    ).transform_calculate(
        mean_reward_label="datum.mean_reward"
    )
    
    # Create the bar chart
    bar_chart = base_chart.mark_bar().encode(
        x='Reset Frequency:N',
        # y=alt.Y('mean_reward:Q', title='Mean Reward'),
        y=alt.Y('mean_reward:Q', title='Mean Reward', scale=alt.Scale(domain=[0, 0.7])),
        color='Reset Frequency:N'
    ).properties(width=200)
    
    
    # Create the text chart to display values above the bars
    text_chart = base_chart.mark_text(
        align='center',
        baseline='bottom',
        fontSize=16,
        dy=0  # Adjusts the position of the text above the bars
    ).encode(
        x='Reset Frequency:N',
        y=alt.Y('mean_reward:Q', title='Mean Reward'),
        text='mean_reward_label:Q'
    ).transform_calculate(
        mean_reward_label="format(datum.mean_reward, '.2f')"  # Format numbers to two decimal places
    ).properties(width=200)
    
    # Layer the bar and text charts
    combined_chart = alt.layer(bar_chart, text_chart)
    
    # Apply faceting to the combined chart
    final_chart = combined_chart.facet(
        column=alt.Column('formatted_replay_ratio:O', title=None, header=alt.Header(labelFontSize=16), sort=alt.EncodingSortField(field="replay_ratio", order="ascending")),
    )
    
    final_chart = final_chart.properties(title="Average Reward After 1000 Episodes, Learning Rate = {}".format(lr)).configure_title(fontSize=18).configure_axis(titleFontSize=14, labelFontSize=12).configure_header(titleFontSize=18).configure_legend(labelFontSize=14, titleFontSize=16)
    
    return final_chart


def plot_model_comparison(df, expected_counts):

    lr = df['learning_rate'].iloc[0]
    counts = df.groupby(['model', 'weight_reset']).count()/1000
    assert (counts.max()['reward'] and counts.min()['reward'] == expected_counts and "sanity check on filter")
    
    source = df.groupby(['model', 'weight_reset']).mean().reset_index()
    source['formatted_replay_ratio'] = 'Architecture: ' + source['model'].replace(1, '3 Layer').replace(2, '2 Layer')
    source['Reset Frequency'] = source['weight_reset']
    
    base_chart = alt.Chart(source).transform_aggregate(
        mean_reward='mean(reward)',
        groupby=['Reset Frequency', 'formatted_replay_ratio']
    ).transform_calculate(
        mean_reward_label="datum.mean_reward"
    )
    
    # Create the bar chart
    bar_chart = base_chart.mark_bar().encode(
        x='Reset Frequency:N',
        y=alt.Y('mean_reward:Q', title='Mean Reward', scale=alt.Scale(domain=[0, 0.8])),
        color='Reset Frequency:N'
    ).properties(width=200)
    
    
    # Create the text chart to display values above the bars
    text_chart = base_chart.mark_text(
        align='center',
        baseline='bottom',
        fontSize=16,
        dy=0  # Adjusts the position of the text above the bars
    ).encode(
        x='Reset Frequency:N',
        y=alt.Y('mean_reward:Q', title='Mean Reward'),
        text='mean_reward_label:Q'
    ).transform_calculate(
        mean_reward_label="format(datum.mean_reward, '.2f')"  # Format numbers to two decimal places
    ).properties(width=200)
    
    # Layer the bar and text charts
    combined_chart = alt.layer(bar_chart, text_chart)
    
    # Apply faceting to the combined chart
    final_chart = combined_chart.facet(
        column=alt.Column('formatted_replay_ratio:O', title=None, header=alt.Header(labelFontSize=16), sort=alt.EncodingSortField(field="replay_ratio", order="ascending")),
    )
    
    final_chart = final_chart.properties(title="Average Reward After 1000 Episodes, Learning Rate = {}, Replay Ratio 16".format(lr)).configure_title(fontSize=18).configure_axis(titleFontSize=14, labelFontSize=12).configure_header(titleFontSize=18).configure_legend(labelFontSize=14, titleFontSize=16)
    
    return final_chart


def plot_model_compare_overtime(df, expected_counts):
    lr = df['learning_rate'].iloc[0]
    counts = df.groupby(['model']).count()/1000
    assert (counts.max()['reward'] and counts.min()['reward'] == expected_counts and "sanity check on filter")
    source = df.groupby(['episode', 'model']).mean()['reward'].reset_index()
    
    source['model'] = 'Architecture: ' + source['model'].replace(1, '3 Layer').replace(2, '2 Layer')
    source.columns=['Episode', 'Model', 'Reward']
    
    # Create a common chart object
    chart1 = alt.Chart(source).encode(
        alt.Color("Model:N").legend(alt.Legend(titleFontSize=16, symbolStrokeWidth=10, labelFontSize=14))
    ).properties(
        title= 'Reward Overtime (LR={})'.format(0.01),
        width=400
    )
    
    
    # Draw the line
    line1 = chart1.mark_line().encode(
        x="Episode:Q",
        y="Reward:Q"
    ).configure_axis(titleFontSize=16, labelFontSize=14).configure_title(fontSize=18)
    
    return line1

In [127]:
expected_counts = 100 # must set manually!!! used to sanity check across all aggregates, I ran 100 trials so every bin should count == 100
lr = 0.01
df = pd.concat([pd.read_pickle(x) for x in glob("cache/*.p")])

In [144]:
plot_action_value_by_episode_and_reset(df[(df['model'] == 1) & (df['learning_rate'] == lr) & (df['replay_ratio'] == 16)], 100)

In [145]:
plot_reward_by_episode_and_reset(df[(df['model'] == 1) & (df['learning_rate'] == lr) & (df['replay_ratio'] == 16)], 100)

In [146]:
plot_hyperparams(df[(df['model'] == 1) & (df['learning_rate'] == lr)], 100)

In [147]:
plot_model_comparison(df[(df['replay_ratio'] == 16) & (df['learning_rate'] == lr)], 100)

In [148]:
plot_model_compare_overtime(df[(df['learning_rate'] == lr) & (df['weight_reset'] == 10) & (df['replay_ratio'] == 16)], 100)

In [156]:
# Alternative way to see full grid search at once. couldnt figure out how to overlay text
alt.Chart(df[df['model'] == 1].groupby(['weight_reset', 'learning_rate', 'replay_ratio']).mean().reset_index(), width=200, height=150).mark_bar().encode(
    alt.Y('reward:Q', title='Mean Reward', scale=alt.Scale(domain=[0, 0.7])),
    alt.X("weight_reset:N"),
    alt.Color("weight_reset:N").title("Reset Frequency").legend(orient="top", titleOrient="left"),
    alt.Row("learning_rate:Q").title("Learning Rate").header(labelAngle=0),
    alt.Column("replay_ratio:N").title("Replay Ratio"),
)