In [None]:
import numpy as np
import wandb
from matplotlib import pyplot as plt
from util import populate_plt_settings, get_column_width, get_fig_size, get_latex_float
import re
import plotly.express as px
import pandas as pd

In [None]:
populate_plt_settings(plt)

In [None]:
api = wandb.Api()
# run = api.run('kennychufk/alluvion-rl/e290336mAuga')
# run = api.run('kennychufk/alluvion-rl/76dyno5g')
run = api.run('kennychufk/alluvion-rl/2amh43ojAugb')


In [None]:
history = run.scan_history(keys=None,
                               page_size=1000,
                               min_step=None,
                               max_step=None)

In [None]:
pattern=re.compile('([0-9]+)-uth-.*m%')
# pattern=re.compile('([0-9]+)-uth-.*%')
buoy_counts = np.arange(4, 101, 2)


num_artifacts = 0
for row in history:
    num_artifacts+=((row['_step']+1)%50==0)
    
val_scores = np.zeros((num_artifacts, len(buoy_counts)))

artifact_id = 0
for row_id, row in enumerate(history):
    if (row['_step']+1)%50!=0:
        continue
    for key in row:
        # if key.endswith('%') and not key.endswith('m%'):
        if key.endswith('m%'):
            num_buoys = int(pattern.match(key)[1])
            val_scores[artifact_id, (num_buoys-4)//2] = row[key]
    artifact_id+=1
latest_val_scores = val_scores[-1]

In [None]:
buoy_counts

In [None]:
num_rows = 1
num_cols = 1
fig, ax = plt.subplots(num_rows, num_cols, figsize = get_fig_size(get_column_width(), ratio=(np.sqrt(5)-1)*0.5))

ax.scatter(x=buoy_counts, y=1-latest_val_scores)
ax.set_xlabel('Number of buoys')
ax.set_ylabel('Score')


fig.tight_layout(pad=0.05) # should set tight_layout before add_axes()
# fig.savefig('buoy-count-score-correlation.pgf')

In [None]:
entries = []

for artifact_id in range(num_artifacts):
    episode = (artifact_id+1)*50
    for i, score in enumerate(val_scores[artifact_id]):
        entries.append({'episode' : episode, 'num_buoys' : buoy_counts[i], 'score' : 1-score})

val_df = pd.DataFrame(entries)

In [None]:
fig = px.line(val_df, x="num_buoys", y="score", color='episode',  color_discrete_sequence=px.colors.sequential.Viridis)
fig.show()

In [None]:
from sklearn.linear_model import LinearRegression
from scipy import stats
import numpy as np


In [None]:
reg = LinearRegression(fit_intercept=True).fit(buoy_counts[:, np.newaxis], latest_val_scores)

In [None]:
reg.coef_

In [None]:
reg.intercept_

In [None]:
reg.score(buoy_counts[:, np.newaxis], latest_val_scores)

In [None]:
reg.coef_

In [None]:
reg.intercept_