In [1]:
import numpy as np
import sys
import matplotlib.pyplot as plt
import pandas as pd
import pickle
sys.path.insert(0, "../")
import torch
from ppo import PPO
import glob
from tqdm.notebook import tqdm
import json
from scipy.stats import spearmanr
import importlib
from obp.ope import (
    ContinuousOffPolicyEvaluation,
    KernelizedInverseProbabilityWeighting,
    KernelizedSelfNormalizedInverseProbabilityWeighting,\
)
import wandb

%matplotlib widget

In [2]:
log_data = pd.read_csv("../data/rule_based_log_data/0_cleaned_log.csv")
with open("../data/rule_based_log_data/action_probs_all_data.pkl", "rb") as f:
    behavior_model = pickle.load(f)
with open("../data/invalid_policy_list_20220705.json") as f:
    invalid_policies = json.load(f)["invalid_policies"]

In [3]:
num_ts_per_day = 4 * 24
num_days = 30
ts_end = num_ts_per_day * num_days
zones = log_data["zone"].unique()

In [4]:
policy_list = sorted(list(glob.glob(f"../policy_library_20220705/**.pth")))

In [14]:
def get_policy_scores(config, use_progress_bar=False):
    policy_scores = {}
    kernel = config["kernel"]
    bandwidth = config["bandwidth"]
    for zone in zones:
        # print(zone)
        ope_data = log_data[log_data["zone"] == zone].sort_values(by=["timestep"])
        ope_data = log_data[:ts_end]
        states = []
        actions = []
        rewards = []
        for i, row in ope_data.iterrows():
            state_vars = ["outdoor_temp", "solar_irradiation", "time_hour",
                          "zone_humidity", "zone_temp", "zone_occupancy"]
            state = [row[var] for var in state_vars]
            action = row["action"]
            reward = row["reward"]
            states.append(state)
            rewards.append(reward)
            actions.append(action)
        ope = ContinuousOffPolicyEvaluation(bandit_feedback=
                                            {"action": np.array(actions),
                                             "reward": np.array(rewards),
                                             "pscore": np.ones((len(ope_data)))},
                                            ope_estimators=[KernelizedSelfNormalizedInverseProbabilityWeighting(kernel=kernel, bandwidth=bandwidth)])
        
        if use_progress_bar:
            policy_iterable = tqdm(policy_list)
        else:
            policy_iterable = policy_list
        for policy in policy_iterable:
            agent = PPO(6, 1, 0.003, 0.0005, 1, 10, 0.2,
                        has_continuous_action_space=True, action_std_init=0.2, 
                        device=torch.device('cpu'), diverse_policies=list(),
                        diverse_weight=0, diverse_increase=True)
            agent.load(policy)
            agent.policy_evaluation = False
            agent.policy_old.set_action_std(0.1)

            # probs = torch.exp(agent.buffer.logprobs[0].reshape(-1, 1))
            if policy not in invalid_policies:
                # score, _, _, _, _ = ipw.evaluate_policy(agent.select_action, behavior_model, score="mean")
                eval_actions = torch.Tensor(agent.select_action(states)).sigmoid()
                estimated_value = ope.estimate_policy_values(action_by_evaluation_policy=eval_actions.numpy())
                # print(estimated_value)
            else:
                continue
            if policy not in policy_scores:
                policy_scores[policy] = {}
            if zone not in policy_scores[policy]:
                policy_scores[policy][zone] = estimated_value["kernelized_snipw"]
    return policy_scores

In [15]:
def calculate_zonewise_spearman_corr(policy_scores):
    eval_data_loc = "../data/evaluation_clean_20220705.csv"
    df = pd.read_csv(eval_data_loc, header=None, names=["datetime","policy","zone","energy"])
    spearman_corr = {}
    for zone in zones:
        eval_df = df[df["zone"] == zone]

        # invalid_policies = list(set(policy_list) - set(policy_scores.keys()))
        for i_policy in invalid_policies:
            eval_df = eval_df[eval_df["policy"]!=i_policy[3:]]
        eval_df = eval_df.sort_values(by=["energy"])
        score_list = []
        for i, row in eval_df.iterrows():
            score_list.append(policy_scores[f"../{row['policy']}"][zone])

        eval_df["ope_scores"] = score_list
        correlation = spearmanr(eval_df["energy"].values, eval_df["ope_scores"].values, nan_policy="omit")
        spearman_corr[zone] = correlation
    return spearman_corr

In [16]:
def calculate_average_spearman_corr(config):
    policy_scores = get_policy_scores(config)
    spearman_corr = calculate_zonewise_spearman_corr(policy_scores)
    corrs = []
    for zone in spearman_corr:
        corrs.append(abs(spearman_corr[zone].correlation))
    return np.mean(corrs)

In [16]:
test_config = {
    "kernel": "gaussian",
    "bandwidth": 0.02
}
calculate_average_spearman_corr(test_config)

0.5780736422596887

## Wandb Hyperparameter Sweep

In [11]:
def evaluate_hyperparams(config=None):
    with wandb.init(config=config):
        config = wandb.config
        avg_spearman_corr = calculate_average_spearman_corr(config)
        wandb.log({"average_spearman_correlation": avg_spearman_corr})   

In [12]:
project = "ContinuousOPEHyperParamTuningSNIPW_28_07_2022"

wandb.init(project=project)

sweep_config = {
    "method": "random",
    "metric": {
        "name": "average_spearman_correlation",
        "goal": "maximize"
    },
    "parameters": {
        "kernel": {
            "values": ["gaussian", "epanechnikov", "triangular", "cosine"]
        },
        "bandwidth": {
            "distribution": "uniform",
            "min": 0,
            "max": 0.5
        }
    }
}

sweep_id = wandb.sweep(sweep_config, project=project)

[34m[1mwandb[0m: Currently logged in as: [33maakashsasikumar[0m. Use [1m`wandb login --relogin`[0m to force relogin


Create sweep with ID: ci54h96s
Sweep URL: https://wandb.ai/aakashsasikumar/ContinuousOPEHyperParamTuningSNIPW_28_07_2022/sweeps/ci54h96s


In [17]:
wandb.agent(sweep_id, evaluate_hyperparams, count=100)

[34m[1mwandb[0m: Agent Starting Run: tr3w06az with config:
[34m[1mwandb[0m: 	bandwidth: 0.012859514079691492
[34m[1mwandb[0m: 	kernel: cosine


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.2511


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: xl3szkss with config:
[34m[1mwandb[0m: 	bandwidth: 0.2965264851904547
[34m[1mwandb[0m: 	kernel: triangular


  estimated_rewards /= (kernel_func(u) / pscore).mean()


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.25861


[34m[1mwandb[0m: Agent Starting Run: kah2fvwd with config:
[34m[1mwandb[0m: 	bandwidth: 0.10538175893178968
[34m[1mwandb[0m: 	kernel: triangular


  estimated_rewards /= (kernel_func(u) / pscore).mean()


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.28731


[34m[1mwandb[0m: Agent Starting Run: e3cn9mgp with config:
[34m[1mwandb[0m: 	bandwidth: 0.11422557544384854
[34m[1mwandb[0m: 	kernel: gaussian


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.20309


[34m[1mwandb[0m: Agent Starting Run: 1ujwl9rd with config:
[34m[1mwandb[0m: 	bandwidth: 0.3425552391178202
[34m[1mwandb[0m: 	kernel: gaussian


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.60641


[34m[1mwandb[0m: Agent Starting Run: gicv09to with config:
[34m[1mwandb[0m: 	bandwidth: 0.37741596229802427
[34m[1mwandb[0m: 	kernel: triangular


  estimated_rewards /= (kernel_func(u) / pscore).mean()


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.23609


[34m[1mwandb[0m: Agent Starting Run: nvr1h7ja with config:
[34m[1mwandb[0m: 	bandwidth: 0.3749606763830324
[34m[1mwandb[0m: 	kernel: epanechnikov


  estimated_rewards /= (kernel_func(u) / pscore).mean()


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.22112


[34m[1mwandb[0m: Agent Starting Run: e3g6iwu7 with config:
[34m[1mwandb[0m: 	bandwidth: 0.232301566274969
[34m[1mwandb[0m: 	kernel: cosine


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.39743


[34m[1mwandb[0m: Agent Starting Run: ja213ki1 with config:
[34m[1mwandb[0m: 	bandwidth: 0.1712776176459354
[34m[1mwandb[0m: 	kernel: epanechnikov


  estimated_rewards /= (kernel_func(u) / pscore).mean()


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.2712


[34m[1mwandb[0m: Agent Starting Run: bzgbvypp with config:
[34m[1mwandb[0m: 	bandwidth: 0.3683269993343709
[34m[1mwandb[0m: 	kernel: cosine


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.31293


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: pziy4wms with config:
[34m[1mwandb[0m: 	bandwidth: 0.1409152427019023
[34m[1mwandb[0m: 	kernel: epanechnikov


  estimated_rewards /= (kernel_func(u) / pscore).mean()


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.27204


[34m[1mwandb[0m: Agent Starting Run: vads4jg4 with config:
[34m[1mwandb[0m: 	bandwidth: 0.31154553004936064
[34m[1mwandb[0m: 	kernel: cosine


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.35188


[34m[1mwandb[0m: Agent Starting Run: zngm3b9z with config:
[34m[1mwandb[0m: 	bandwidth: 0.2832042951180449
[34m[1mwandb[0m: 	kernel: cosine


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.36619


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: kvztr1c9 with config:
[34m[1mwandb[0m: 	bandwidth: 0.4809933862260597
[34m[1mwandb[0m: 	kernel: epanechnikov


  estimated_rewards /= (kernel_func(u) / pscore).mean()


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.17398


[34m[1mwandb[0m: Agent Starting Run: p7sufig4 with config:
[34m[1mwandb[0m: 	bandwidth: 0.34840394149136245
[34m[1mwandb[0m: 	kernel: gaussian


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.61197


[34m[1mwandb[0m: Agent Starting Run: b28am3xh with config:
[34m[1mwandb[0m: 	bandwidth: 0.15244611304016825
[34m[1mwandb[0m: 	kernel: triangular


  estimated_rewards /= (kernel_func(u) / pscore).mean()


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.28098


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: nx7uvli4 with config:
[34m[1mwandb[0m: 	bandwidth: 0.27145288323205075
[34m[1mwandb[0m: 	kernel: cosine


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.36597


[34m[1mwandb[0m: Agent Starting Run: xs1l2xtu with config:
[34m[1mwandb[0m: 	bandwidth: 0.03785206266477242
[34m[1mwandb[0m: 	kernel: triangular


  estimated_rewards /= (kernel_func(u) / pscore).mean()


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.35229


[34m[1mwandb[0m: Agent Starting Run: e064beyo with config:
[34m[1mwandb[0m: 	bandwidth: 0.03037242980132032
[34m[1mwandb[0m: 	kernel: gaussian


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.21597


[34m[1mwandb[0m: Agent Starting Run: 1mgkpqm8 with config:
[34m[1mwandb[0m: 	bandwidth: 0.07037301226839593
[34m[1mwandb[0m: 	kernel: gaussian


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.22075


[34m[1mwandb[0m: Agent Starting Run: gfg6mj4t with config:
[34m[1mwandb[0m: 	bandwidth: 0.3538245389308409
[34m[1mwandb[0m: 	kernel: cosine


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.31707


[34m[1mwandb[0m: Agent Starting Run: upb01iux with config:
[34m[1mwandb[0m: 	bandwidth: 0.34876494138099895
[34m[1mwandb[0m: 	kernel: triangular


  estimated_rewards /= (kernel_func(u) / pscore).mean()


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.24216


[34m[1mwandb[0m: Agent Starting Run: ux6e441v with config:
[34m[1mwandb[0m: 	bandwidth: 0.3653482741290579
[34m[1mwandb[0m: 	kernel: gaussian


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.62256


[34m[1mwandb[0m: Agent Starting Run: ybof9q46 with config:
[34m[1mwandb[0m: 	bandwidth: 0.46225824846060254
[34m[1mwandb[0m: 	kernel: cosine


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.24377


[34m[1mwandb[0m: Agent Starting Run: rhtirqhl with config:
[34m[1mwandb[0m: 	bandwidth: 0.360378086569041
[34m[1mwandb[0m: 	kernel: triangular


  estimated_rewards /= (kernel_func(u) / pscore).mean()


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.23305


[34m[1mwandb[0m: Agent Starting Run: w57saypt with config:
[34m[1mwandb[0m: 	bandwidth: 0.1171614838219735
[34m[1mwandb[0m: 	kernel: cosine


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.41354


[34m[1mwandb[0m: Agent Starting Run: 2wm37s69 with config:
[34m[1mwandb[0m: 	bandwidth: 0.04622984831869886
[34m[1mwandb[0m: 	kernel: triangular


  estimated_rewards /= (kernel_func(u) / pscore).mean()


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.33984


[34m[1mwandb[0m: Agent Starting Run: srcxiidh with config:
[34m[1mwandb[0m: 	bandwidth: 0.21923469965007952
[34m[1mwandb[0m: 	kernel: triangular


  estimated_rewards /= (kernel_func(u) / pscore).mean()


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.26943


[34m[1mwandb[0m: Agent Starting Run: y3biyrk7 with config:
[34m[1mwandb[0m: 	bandwidth: 0.12641310676203615
[34m[1mwandb[0m: 	kernel: epanechnikov


  estimated_rewards /= (kernel_func(u) / pscore).mean()


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.26755


[34m[1mwandb[0m: Agent Starting Run: q54ogcil with config:
[34m[1mwandb[0m: 	bandwidth: 0.04499155876652422
[34m[1mwandb[0m: 	kernel: cosine


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.27862


[34m[1mwandb[0m: Agent Starting Run: u1lhlk62 with config:
[34m[1mwandb[0m: 	bandwidth: 0.4832140350106764
[34m[1mwandb[0m: 	kernel: gaussian


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.64996


[34m[1mwandb[0m: Agent Starting Run: j1fj4inr with config:
[34m[1mwandb[0m: 	bandwidth: 0.3237225212797457
[34m[1mwandb[0m: 	kernel: gaussian


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.58745


[34m[1mwandb[0m: Agent Starting Run: 6q5apdyy with config:
[34m[1mwandb[0m: 	bandwidth: 0.3606641700183443
[34m[1mwandb[0m: 	kernel: epanechnikov


  estimated_rewards /= (kernel_func(u) / pscore).mean()


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.22465


[34m[1mwandb[0m: Agent Starting Run: 95mwb3ud with config:
[34m[1mwandb[0m: 	bandwidth: 0.16740458287379711
[34m[1mwandb[0m: 	kernel: epanechnikov


  estimated_rewards /= (kernel_func(u) / pscore).mean()


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.28185


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: in9qxr9d with config:
[34m[1mwandb[0m: 	bandwidth: 0.1931577292462589
[34m[1mwandb[0m: 	kernel: cosine


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.38289


[34m[1mwandb[0m: Agent Starting Run: k4ic5bqu with config:
[34m[1mwandb[0m: 	bandwidth: 0.3965536470211747
[34m[1mwandb[0m: 	kernel: gaussian


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.63654


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 7mh908fl with config:
[34m[1mwandb[0m: 	bandwidth: 0.20015951108917412
[34m[1mwandb[0m: 	kernel: epanechnikov


  estimated_rewards /= (kernel_func(u) / pscore).mean()


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.27781


[34m[1mwandb[0m: Agent Starting Run: e0rqkvcu with config:
[34m[1mwandb[0m: 	bandwidth: 0.3708726057231202
[34m[1mwandb[0m: 	kernel: cosine


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.31803


[34m[1mwandb[0m: Agent Starting Run: al8wcmqh with config:
[34m[1mwandb[0m: 	bandwidth: 0.06798062734354438
[34m[1mwandb[0m: 	kernel: cosine


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.34528


[34m[1mwandb[0m: Agent Starting Run: dr2an8xc with config:
[34m[1mwandb[0m: 	bandwidth: 0.06456989107439243
[34m[1mwandb[0m: 	kernel: gaussian


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.21782


[34m[1mwandb[0m: Agent Starting Run: djcvl5t3 with config:
[34m[1mwandb[0m: 	bandwidth: 0.36222148813079896
[34m[1mwandb[0m: 	kernel: cosine


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.31475


[34m[1mwandb[0m: Agent Starting Run: 73b14ds0 with config:
[34m[1mwandb[0m: 	bandwidth: 0.0745964113899909
[34m[1mwandb[0m: 	kernel: triangular


  estimated_rewards /= (kernel_func(u) / pscore).mean()


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.31194


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: edbwbins with config:
[34m[1mwandb[0m: 	bandwidth: 0.11434791376947796
[34m[1mwandb[0m: 	kernel: gaussian


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.20042


[34m[1mwandb[0m: Agent Starting Run: me9xkxtk with config:
[34m[1mwandb[0m: 	bandwidth: 0.3553185275980034
[34m[1mwandb[0m: 	kernel: triangular


  estimated_rewards /= (kernel_func(u) / pscore).mean()


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.23777


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: adlawk36 with config:
[34m[1mwandb[0m: 	bandwidth: 0.10783456245454852
[34m[1mwandb[0m: 	kernel: triangular


  estimated_rewards /= (kernel_func(u) / pscore).mean()


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.27226


[34m[1mwandb[0m: Agent Starting Run: qzt3jpyk with config:
[34m[1mwandb[0m: 	bandwidth: 0.3536365355861065
[34m[1mwandb[0m: 	kernel: epanechnikov


  estimated_rewards /= (kernel_func(u) / pscore).mean()


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.23095


[34m[1mwandb[0m: Agent Starting Run: woaz5k4c with config:
[34m[1mwandb[0m: 	bandwidth: 0.27285042831773554
[34m[1mwandb[0m: 	kernel: gaussian


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.52263


[34m[1mwandb[0m: Agent Starting Run: gqbi23io with config:
[34m[1mwandb[0m: 	bandwidth: 0.12131483445651114
[34m[1mwandb[0m: 	kernel: cosine


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.41033


[34m[1mwandb[0m: Agent Starting Run: r3c3he0o with config:
[34m[1mwandb[0m: 	bandwidth: 0.22236751112527747
[34m[1mwandb[0m: 	kernel: epanechnikov


  estimated_rewards /= (kernel_func(u) / pscore).mean()


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.27123


[34m[1mwandb[0m: Agent Starting Run: 5u6iwf72 with config:
[34m[1mwandb[0m: 	bandwidth: 0.04446213871698035
[34m[1mwandb[0m: 	kernel: triangular


  estimated_rewards /= (kernel_func(u) / pscore).mean()


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.34255


[34m[1mwandb[0m: Agent Starting Run: i2x708w9 with config:
[34m[1mwandb[0m: 	bandwidth: 0.16509925342699144
[34m[1mwandb[0m: 	kernel: gaussian


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.1599


[34m[1mwandb[0m: Agent Starting Run: 03fq9qoz with config:
[34m[1mwandb[0m: 	bandwidth: 0.10696086973546104
[34m[1mwandb[0m: 	kernel: gaussian


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.20564


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: kr7voohr with config:
[34m[1mwandb[0m: 	bandwidth: 0.3209529320016347
[34m[1mwandb[0m: 	kernel: cosine


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.34883


[34m[1mwandb[0m: Agent Starting Run: a4lsmcyv with config:
[34m[1mwandb[0m: 	bandwidth: 0.08916191365441323
[34m[1mwandb[0m: 	kernel: triangular


  estimated_rewards /= (kernel_func(u) / pscore).mean()


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.28893


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: cg5solsf with config:
[34m[1mwandb[0m: 	bandwidth: 0.24320059296582913
[34m[1mwandb[0m: 	kernel: triangular


  estimated_rewards /= (kernel_func(u) / pscore).mean()


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.27597


[34m[1mwandb[0m: Agent Starting Run: 63n2sae5 with config:
[34m[1mwandb[0m: 	bandwidth: 0.16316932585662292
[34m[1mwandb[0m: 	kernel: cosine


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.39961


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: pgu8ba3f with config:
[34m[1mwandb[0m: 	bandwidth: 0.1231972884464358
[34m[1mwandb[0m: 	kernel: epanechnikov


  estimated_rewards /= (kernel_func(u) / pscore).mean()


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.27302


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: swjrpa0f with config:
[34m[1mwandb[0m: 	bandwidth: 0.18095165812691488
[34m[1mwandb[0m: 	kernel: gaussian


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.18583


[34m[1mwandb[0m: Agent Starting Run: y6ia8wg2 with config:
[34m[1mwandb[0m: 	bandwidth: 0.001944359142891372
[34m[1mwandb[0m: 	kernel: triangular


  estimated_rewards /= (kernel_func(u) / pscore).mean()


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.14399


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: gd3c5o5z with config:
[34m[1mwandb[0m: 	bandwidth: 0.2107070088984576
[34m[1mwandb[0m: 	kernel: cosine


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.37756


[34m[1mwandb[0m: Agent Starting Run: 6se54077 with config:
[34m[1mwandb[0m: 	bandwidth: 0.10943957201986082
[34m[1mwandb[0m: 	kernel: gaussian


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.20395


[34m[1mwandb[0m: Agent Starting Run: 91v9trfw with config:
[34m[1mwandb[0m: 	bandwidth: 0.1494199278490458
[34m[1mwandb[0m: 	kernel: triangular


  estimated_rewards /= (kernel_func(u) / pscore).mean()


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.28251


[34m[1mwandb[0m: Agent Starting Run: voyknpvh with config:
[34m[1mwandb[0m: 	bandwidth: 0.031647350471198155
[34m[1mwandb[0m: 	kernel: cosine


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.26211


[34m[1mwandb[0m: Agent Starting Run: 6v8jinxb with config:
[34m[1mwandb[0m: 	bandwidth: 0.3857683438813712
[34m[1mwandb[0m: 	kernel: cosine


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.3009


[34m[1mwandb[0m: Agent Starting Run: 8tgjs7ev with config:
[34m[1mwandb[0m: 	bandwidth: 0.045059178214086726
[34m[1mwandb[0m: 	kernel: cosine


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.28974


[34m[1mwandb[0m: Agent Starting Run: ko2noo55 with config:
[34m[1mwandb[0m: 	bandwidth: 0.4649578792084173
[34m[1mwandb[0m: 	kernel: triangular


  estimated_rewards /= (kernel_func(u) / pscore).mean()


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.20586


[34m[1mwandb[0m: Agent Starting Run: m1f8absj with config:
[34m[1mwandb[0m: 	bandwidth: 0.199721018243212
[34m[1mwandb[0m: 	kernel: cosine


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.37967


[34m[1mwandb[0m: Agent Starting Run: m321qid7 with config:
[34m[1mwandb[0m: 	bandwidth: 0.3736289705891255
[34m[1mwandb[0m: 	kernel: cosine


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.31302


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: wgat28zk with config:
[34m[1mwandb[0m: 	bandwidth: 0.13530292225295248
[34m[1mwandb[0m: 	kernel: cosine


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.38942


[34m[1mwandb[0m: Agent Starting Run: kerx8eqa with config:
[34m[1mwandb[0m: 	bandwidth: 0.09881911838014255
[34m[1mwandb[0m: 	kernel: gaussian


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.20926


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 971g0u2q with config:
[34m[1mwandb[0m: 	bandwidth: 0.4980154649404472
[34m[1mwandb[0m: 	kernel: gaussian


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.65098


[34m[1mwandb[0m: Agent Starting Run: pm6daipa with config:
[34m[1mwandb[0m: 	bandwidth: 0.253427320779735
[34m[1mwandb[0m: 	kernel: gaussian


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.47531


[34m[1mwandb[0m: Agent Starting Run: ukl206vz with config:
[34m[1mwandb[0m: 	bandwidth: 0.41369333324548374
[34m[1mwandb[0m: 	kernel: triangular


  estimated_rewards /= (kernel_func(u) / pscore).mean()


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.22425


[34m[1mwandb[0m: Agent Starting Run: l2af9k5m with config:
[34m[1mwandb[0m: 	bandwidth: 0.39359793678961946
[34m[1mwandb[0m: 	kernel: gaussian


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.63587


[34m[1mwandb[0m: Agent Starting Run: m44cqwse with config:
[34m[1mwandb[0m: 	bandwidth: 0.432936696479247
[34m[1mwandb[0m: 	kernel: triangular


  estimated_rewards /= (kernel_func(u) / pscore).mean()


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.21685


[34m[1mwandb[0m: Agent Starting Run: jgwxv3pk with config:
[34m[1mwandb[0m: 	bandwidth: 0.06016206720309086
[34m[1mwandb[0m: 	kernel: epanechnikov


  estimated_rewards /= (kernel_func(u) / pscore).mean()


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.34003


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: tzwvhi6v with config:
[34m[1mwandb[0m: 	bandwidth: 0.24959188015702805
[34m[1mwandb[0m: 	kernel: gaussian


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.46326


[34m[1mwandb[0m: Agent Starting Run: 827vkz3c with config:
[34m[1mwandb[0m: 	bandwidth: 0.22118213463253783
[34m[1mwandb[0m: 	kernel: triangular


  estimated_rewards /= (kernel_func(u) / pscore).mean()


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.27583


[34m[1mwandb[0m: Agent Starting Run: 9gtgzu4r with config:
[34m[1mwandb[0m: 	bandwidth: 0.4764541867616039
[34m[1mwandb[0m: 	kernel: gaussian


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.64905


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: l1905vqx with config:
[34m[1mwandb[0m: 	bandwidth: 0.25655470888476123
[34m[1mwandb[0m: 	kernel: epanechnikov


  estimated_rewards /= (kernel_func(u) / pscore).mean()


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.25684


[34m[1mwandb[0m: Agent Starting Run: zsv5zwa7 with config:
[34m[1mwandb[0m: 	bandwidth: 0.040267779715958296
[34m[1mwandb[0m: 	kernel: triangular


  estimated_rewards /= (kernel_func(u) / pscore).mean()


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.33471


[34m[1mwandb[0m: Agent Starting Run: 4fksd3va with config:
[34m[1mwandb[0m: 	bandwidth: 0.4487526610470501
[34m[1mwandb[0m: 	kernel: cosine


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.25105


[34m[1mwandb[0m: Agent Starting Run: 8wuxarlj with config:
[34m[1mwandb[0m: 	bandwidth: 0.3253037847332823
[34m[1mwandb[0m: 	kernel: triangular


  estimated_rewards /= (kernel_func(u) / pscore).mean()


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.25082


[34m[1mwandb[0m: Agent Starting Run: tntlqitw with config:
[34m[1mwandb[0m: 	bandwidth: 0.3159302307270631
[34m[1mwandb[0m: 	kernel: cosine


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.34827


[34m[1mwandb[0m: Agent Starting Run: ztg6u3vj with config:
[34m[1mwandb[0m: 	bandwidth: 0.2520182344271046
[34m[1mwandb[0m: 	kernel: cosine


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.38377


[34m[1mwandb[0m: Agent Starting Run: 3axpa9bo with config:
[34m[1mwandb[0m: 	bandwidth: 0.25366076235449064
[34m[1mwandb[0m: 	kernel: gaussian


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.47618


[34m[1mwandb[0m: Agent Starting Run: 53zgcpeo with config:
[34m[1mwandb[0m: 	bandwidth: 0.1426171836454096
[34m[1mwandb[0m: 	kernel: epanechnikov


  estimated_rewards /= (kernel_func(u) / pscore).mean()


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.26922


[34m[1mwandb[0m: Agent Starting Run: r2ls3qw5 with config:
[34m[1mwandb[0m: 	bandwidth: 0.42409811649582047
[34m[1mwandb[0m: 	kernel: epanechnikov


  estimated_rewards /= (kernel_func(u) / pscore).mean()


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.20521


[34m[1mwandb[0m: Agent Starting Run: owfzzfh2 with config:
[34m[1mwandb[0m: 	bandwidth: 0.2282194044588487
[34m[1mwandb[0m: 	kernel: gaussian


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.38045


[34m[1mwandb[0m: Agent Starting Run: u3n5txvc with config:
[34m[1mwandb[0m: 	bandwidth: 0.3096635892104467
[34m[1mwandb[0m: 	kernel: gaussian


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.5749


[34m[1mwandb[0m: Agent Starting Run: ropphbo1 with config:
[34m[1mwandb[0m: 	bandwidth: 0.31944127759503
[34m[1mwandb[0m: 	kernel: gaussian


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.58465


[34m[1mwandb[0m: Agent Starting Run: 05i1lctk with config:
[34m[1mwandb[0m: 	bandwidth: 0.3146650148410831
[34m[1mwandb[0m: 	kernel: cosine


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.34519


[34m[1mwandb[0m: Agent Starting Run: rphjjqts with config:
[34m[1mwandb[0m: 	bandwidth: 0.08505074619447961
[34m[1mwandb[0m: 	kernel: triangular


  estimated_rewards /= (kernel_func(u) / pscore).mean()


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.30498


[34m[1mwandb[0m: Agent Starting Run: 1ce73241 with config:
[34m[1mwandb[0m: 	bandwidth: 0.34070182505191793
[34m[1mwandb[0m: 	kernel: cosine


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.32485


[34m[1mwandb[0m: Agent Starting Run: nyumev7m with config:
[34m[1mwandb[0m: 	bandwidth: 0.0029950803811935045
[34m[1mwandb[0m: 	kernel: cosine


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.25419


[34m[1mwandb[0m: Agent Starting Run: 8t1p6fzy with config:
[34m[1mwandb[0m: 	bandwidth: 0.46187434262776433
[34m[1mwandb[0m: 	kernel: triangular


  estimated_rewards /= (kernel_func(u) / pscore).mean()


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.20601


[34m[1mwandb[0m: Agent Starting Run: 0ogumqxn with config:
[34m[1mwandb[0m: 	bandwidth: 0.22549702520101633
[34m[1mwandb[0m: 	kernel: cosine


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.38817


[34m[1mwandb[0m: Agent Starting Run: ozrgo0lv with config:
[34m[1mwandb[0m: 	bandwidth: 0.2604600955653822
[34m[1mwandb[0m: 	kernel: triangular


  estimated_rewards /= (kernel_func(u) / pscore).mean()


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.26762


[34m[1mwandb[0m: Agent Starting Run: 2gc52jf8 with config:
[34m[1mwandb[0m: 	bandwidth: 0.15805141676165663
[34m[1mwandb[0m: 	kernel: triangular


  estimated_rewards /= (kernel_func(u) / pscore).mean()


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.2869


[34m[1mwandb[0m: Agent Starting Run: 1mkvbecj with config:
[34m[1mwandb[0m: 	bandwidth: 0.04331731296962388
[34m[1mwandb[0m: 	kernel: triangular


  estimated_rewards /= (kernel_func(u) / pscore).mean()


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
average_spearman_correlation,▁

0,1
average_spearman_correlation,0.33529


wandb: Network error (ReadTimeout), entering retry loop.
