# Zero-Cost Proxy Evaluation  w/ minibatch

Testing zero-cost proxies with zone-agnostic behavior policy model

In [1]:
import numpy as np
import sys
import matplotlib.pyplot as plt
import pandas as pd
import pickle
sys.path.insert(0, "../")
import torch
from ppo import PPO
import glob
from tqdm.notebook import tqdm
import json
from scipy.stats import spearmanr

In [2]:
log_data = pd.read_csv("../data/rule_based_log_data/0_cleaned_log.csv")
with open("../data/rule_based_log_data/action_probs_all_data.pkl", "rb") as f:
    behavior_model = pickle.load(f)
with open("../data/invalid_policy_list_20220705.json") as f:
    invalid_policies = json.load(f)["invalid_policies"]

## 30 Day MiniBatch

In [3]:
num_ts_per_day = 4 * 24
num_days = 30
ts_end = num_ts_per_day * num_days
zones = log_data["zone"].unique()

## Loading all Policies

In [4]:
policy_list = sorted(list(glob.glob(f"../policy_library_20220705/**.pth")))

### 1. GradNorm

In [10]:
from zero_cost_proxies.grad_norm import GradNorm
policy_scores = {}
for zone in zones:
    print(zone)
    ope_data = log_data[log_data["zone"] == zone].sort_values(by=["timestep"])
    ope_data = log_data[:ts_end]
    for policy in tqdm(policy_list):
        agent = PPO(6, 1, 0.003, 0.0005, 1, 10, 0.2,
                    has_continuous_action_space=True, action_std_init=0.2, 
                    device=torch.device('cpu'), diverse_policies=list(),
                    diverse_weight=0, diverse_increase=True)
        agent.load(policy)
        agent.policy_evaluation = True
        agent.policy_old.set_action_std(0.1)
        gn = GradNorm(agent, behavior_model)
        if policy not in invalid_policies:
            norm = gn.get_grad_norm(ope_data)
        else:
            continue
        if policy not in policy_scores:
            policy_scores[policy] = {}
        if zone not in policy_scores[policy]:
            policy_scores[policy][zone] = norm

Perimeter_top_ZN_3


  0%|          | 0/400 [00:00<?, ?it/s]

  return np.log(value / (1- value))


Perimeter_bot_ZN_1


  0%|          | 0/400 [00:00<?, ?it/s]

Perimeter_top_ZN_1


  0%|          | 0/400 [00:00<?, ?it/s]

Core_bottom


  0%|          | 0/400 [00:00<?, ?it/s]

Perimeter_top_ZN_4


  0%|          | 0/400 [00:00<?, ?it/s]

Core_top


  0%|          | 0/400 [00:00<?, ?it/s]

Perimeter_bot_ZN_2


  0%|          | 0/400 [00:00<?, ?it/s]

Perimeter_bot_ZN_3


  0%|          | 0/400 [00:00<?, ?it/s]

Perimeter_mid_ZN_4


  0%|          | 0/400 [00:00<?, ?it/s]

Core_mid


  0%|          | 0/400 [00:00<?, ?it/s]

Perimeter_mid_ZN_2


  0%|          | 0/400 [00:00<?, ?it/s]

Perimeter_mid_ZN_3


  0%|          | 0/400 [00:00<?, ?it/s]

Perimeter_top_ZN_2


  0%|          | 0/400 [00:00<?, ?it/s]

Perimeter_mid_ZN_1


  0%|          | 0/400 [00:00<?, ?it/s]

Perimeter_bot_ZN_4


  0%|          | 0/400 [00:00<?, ?it/s]

In [11]:
eval_data_loc = "../data/evaluation_clean_20220705.csv"
df = pd.read_csv(eval_data_loc, header=None, names=["datetime","policy","zone","energy"])
spearman_corr = {}
for zone in zones:
    eval_df = df[df["zone"] == zone]

    # invalid_policies = list(set(policy_list) - set(policy_scores.keys()))
    for i_policy in invalid_policies:
        eval_df = eval_df[eval_df["policy"]!=i_policy[3:]]
    eval_df = eval_df.sort_values(by=["energy"])
    score_list = []
    for i, row in eval_df.iterrows():
        score_list.append(policy_scores[f"../{row['policy']}"][zone])

    eval_df["ope_scores"] = score_list
    correlation = spearmanr(eval_df["energy"].values, eval_df["ope_scores"].values)
    spearman_corr[zone] = correlation

In [12]:
spearman_corr

{'Perimeter_top_ZN_3': SpearmanrResult(correlation=-0.5902510315327281, pvalue=1.045488290059922e-25),
 'Perimeter_bot_ZN_1': SpearmanrResult(correlation=-0.6303357920699232, pvalue=4.318424827029897e-30),
 'Perimeter_top_ZN_1': SpearmanrResult(correlation=-0.6401639178876017, pvalue=2.886320991839268e-31),
 'Core_bottom': SpearmanrResult(correlation=-0.47290283920061277, pvalue=7.752887939267293e-16),
 'Perimeter_top_ZN_4': SpearmanrResult(correlation=-0.6302359432551793, pvalue=4.436563263311739e-30),
 'Core_top': SpearmanrResult(correlation=-0.47328198539925576, pvalue=7.30017895397199e-16),
 'Perimeter_bot_ZN_2': SpearmanrResult(correlation=-0.6341733381246963, pvalue=1.519065547494607e-30),
 'Perimeter_bot_ZN_3': SpearmanrResult(correlation=-0.42251584553657057, pvalue=1.227562708309841e-12),
 'Perimeter_mid_ZN_4': SpearmanrResult(correlation=-0.5520281371809501, pvalue=4.646877957952508e-22),
 'Core_mid': SpearmanrResult(correlation=-0.4698675748810896, pvalue=1.2516595330940706e

In [13]:
with open(f"data/grad_norm_raw_scores_{num_days}_days_02_08_2022_new_policies.pkl", "wb+") as f:
    pickle.dump(policy_scores, f)

In [14]:
with open(f"data/grad_norm_ipw_spearman_corrs_{num_days}_days_02_08_2022_new_policies.pkl", "wb+") as f:
    pickle.dump(spearman_corr, f)

### 2. Synflow

In [10]:
from zero_cost_proxies.synflow import SynFlow
policy_scores_synflow = {}
for zone in zones:
    print(zone)
    ope_data = log_data[log_data["zone"] == zone].sort_values(by=["timestep"])
    ope_data = log_data[:ts_end]
    for policy in tqdm(policy_list):
        agent = PPO(6, 1, 0.003, 0.0005, 1, 10, 0.2,
                    has_continuous_action_space=True, action_std_init=0.2, 
                    device=torch.device('cpu'), diverse_policies=list(),
                    diverse_weight=0, diverse_increase=True)
        agent.load(policy)
        agent.policy_evaluation = True
        agent.policy_old.set_action_std(0.1)
        sf = SynFlow(agent)
        if policy not in invalid_policies:
            synflow = sf.get_synflow()
        else:
            continue
        if policy not in policy_scores_synflow:
            policy_scores_synflow[policy] = {}
        if zone not in policy_scores_synflow[policy]:
            policy_scores_synflow[policy][zone] = synflow

Perimeter_top_ZN_3


  0%|          | 0/400 [00:00<?, ?it/s]

Perimeter_bot_ZN_1


  0%|          | 0/400 [00:00<?, ?it/s]

Perimeter_top_ZN_1


  0%|          | 0/400 [00:00<?, ?it/s]

Core_bottom


  0%|          | 0/400 [00:00<?, ?it/s]

Perimeter_top_ZN_4


  0%|          | 0/400 [00:00<?, ?it/s]

Core_top


  0%|          | 0/400 [00:00<?, ?it/s]

Perimeter_bot_ZN_2


  0%|          | 0/400 [00:00<?, ?it/s]

Perimeter_bot_ZN_3


  0%|          | 0/400 [00:00<?, ?it/s]

Perimeter_mid_ZN_4


  0%|          | 0/400 [00:00<?, ?it/s]

Core_mid


  0%|          | 0/400 [00:00<?, ?it/s]

Perimeter_mid_ZN_2


  0%|          | 0/400 [00:00<?, ?it/s]

Perimeter_mid_ZN_3


  0%|          | 0/400 [00:00<?, ?it/s]

Perimeter_top_ZN_2


  0%|          | 0/400 [00:00<?, ?it/s]

Perimeter_mid_ZN_1


  0%|          | 0/400 [00:00<?, ?it/s]

Perimeter_bot_ZN_4


  0%|          | 0/400 [00:00<?, ?it/s]

In [11]:
eval_data_loc = "../data/evaluation_clean_20220705.csv"
df = pd.read_csv(eval_data_loc, header=None, names=["datetime","policy","zone","energy"])
spearman_corr_synflow = {}
for zone in zones:
    eval_df = df[df["zone"] == zone]

    # invalid_policies = list(set(policy_list) - set(policy_scores.keys()))
    for i_policy in invalid_policies:
        eval_df = eval_df[eval_df["policy"]!=i_policy[3:]]
    eval_df = eval_df.sort_values(by=["energy"])
    score_list = []
    for i, row in eval_df.iterrows():
        score_list.append(policy_scores_synflow[f"../{row['policy']}"][zone])

    eval_df["ope_scores"] = score_list
    correlation = spearmanr(eval_df["energy"].values, eval_df["ope_scores"].values)
    spearman_corr_synflow[zone] = correlation

In [12]:
spearman_corr_synflow

{'Perimeter_top_ZN_3': SpearmanrResult(correlation=0.025452925452925452, pvalue=0.6834860292897358),
 'Perimeter_bot_ZN_1': SpearmanrResult(correlation=0.10180065063785992, pvalue=0.10212218891870474),
 'Perimeter_top_ZN_1': SpearmanrResult(correlation=0.11671006554727484, pvalue=0.06071020264686637),
 'Core_bottom': SpearmanrResult(correlation=-0.07026888887354003, pvalue=0.25982953912292006),
 'Perimeter_top_ZN_4': SpearmanrResult(correlation=0.12666164759188012, pvalue=0.0416724218690959),
 'Core_top': SpearmanrResult(correlation=0.06516600935205587, pvalue=0.29612197919794137),
 'Perimeter_bot_ZN_2': SpearmanrResult(correlation=0.05487667580690836, pvalue=0.37910440690310465),
 'Perimeter_bot_ZN_3': SpearmanrResult(correlation=0.14691016086364922, pvalue=0.017995575085830086),
 'Perimeter_mid_ZN_4': SpearmanrResult(correlation=-0.060182620647736926, pvalue=0.33467630412167293),
 'Core_mid': SpearmanrResult(correlation=0.08101339264129961, pvalue=0.19373195294845727),
 'Perimeter_mi

In [13]:
with open(f"data/synflow_raw_scores_{num_days}_days_19_07_2022.pkl", "wb+") as f:
    pickle.dump(policy_scores_synflow, f)

In [14]:
with open(f"data/synflow_spearman_corr_{num_days}_days_19_07_2022.pkl", "wb+") as f:
    pickle.dump(spearman_corr_synflow, f)

### 3. Jacobian Covariance

In [15]:
from zero_cost_proxies.jacob_cov import JacobianCovariance
policy_scores_jc = {}
for zone in zones:
    print(zone)
    ope_data = log_data[log_data["zone"] == zone].sort_values(by=["timestep"])
    ope_data = log_data[:ts_end]
    states = []
    for i, row in ope_data.iterrows():
        state_vars = ["outdoor_temp", "solar_irradiation", "time_hour",
                      "zone_humidity", "zone_temp", "zone_occupancy"]
        state = [row[var] for var in state_vars]
        states.append(state)
    states = torch.Tensor(states)
    for policy in tqdm(policy_list):
        agent = PPO(6, 1, 0.003, 0.0005, 1, 10, 0.2,
                    has_continuous_action_space=True, action_std_init=0.2, 
                    device=torch.device('cpu'), diverse_policies=list(),
                    diverse_weight=0, diverse_increase=True)
        agent.load(policy)
        agent.policy_evaluation = True
        agent.policy_old.set_action_std(0.1)
        jc = JacobianCovariance(agent)
        if policy not in invalid_policies:
            synflow = jc.compute_jacob_cov(states)
        else:
            continue
        if policy not in policy_scores_jc:
            policy_scores_jc[policy] = {}
        if zone not in policy_scores_jc[policy]:
            policy_scores_jc[policy][zone] = synflow

Perimeter_top_ZN_3


  0%|          | 0/400 [00:00<?, ?it/s]

Perimeter_bot_ZN_1


  0%|          | 0/400 [00:00<?, ?it/s]

Perimeter_top_ZN_1


  0%|          | 0/400 [00:00<?, ?it/s]

Core_bottom


  0%|          | 0/400 [00:00<?, ?it/s]

Perimeter_top_ZN_4


  0%|          | 0/400 [00:00<?, ?it/s]

Core_top


  0%|          | 0/400 [00:00<?, ?it/s]

Perimeter_bot_ZN_2


  0%|          | 0/400 [00:00<?, ?it/s]

Perimeter_bot_ZN_3


  0%|          | 0/400 [00:00<?, ?it/s]

Perimeter_mid_ZN_4


  0%|          | 0/400 [00:00<?, ?it/s]

Core_mid


  0%|          | 0/400 [00:00<?, ?it/s]

Perimeter_mid_ZN_2


  0%|          | 0/400 [00:00<?, ?it/s]

Perimeter_mid_ZN_3


  0%|          | 0/400 [00:00<?, ?it/s]

Perimeter_top_ZN_2


  0%|          | 0/400 [00:00<?, ?it/s]

Perimeter_mid_ZN_1


  0%|          | 0/400 [00:00<?, ?it/s]

Perimeter_bot_ZN_4


  0%|          | 0/400 [00:00<?, ?it/s]

In [16]:
eval_data_loc = "../data/evaluation_clean_20220705.csv"
df = pd.read_csv(eval_data_loc, header=None, names=["datetime","policy","zone","energy"])
spearman_corr_jc = {}
for zone in zones:
    eval_df = df[df["zone"] == zone]

    # invalid_policies = list(set(policy_list) - set(policy_scores.keys()))
    for i_policy in invalid_policies:
        eval_df = eval_df[eval_df["policy"]!=i_policy[3:]]
    eval_df = eval_df.sort_values(by=["energy"])
    score_list = []
    for i, row in eval_df.iterrows():
        score_list.append(policy_scores_jc[f"../{row['policy']}"][zone])

    eval_df["ope_scores"] = score_list
    correlation = spearmanr(eval_df["energy"].values, eval_df["ope_scores"].values)
    spearman_corr_jc[zone] = correlation

In [17]:
spearman_corr_jc

{'Perimeter_top_ZN_3': SpearmanrResult(correlation=-0.009908758745968046, pvalue=0.8739060762668507),
 'Perimeter_bot_ZN_1': SpearmanrResult(correlation=-0.018688225664969853, pvalue=0.7646883410616825),
 'Perimeter_top_ZN_1': SpearmanrResult(correlation=-0.031863987677941165, pvalue=0.6097351375982247),
 'Core_bottom': SpearmanrResult(correlation=-0.006721185790953233, pvalue=0.914277094600226),
 'Perimeter_top_ZN_4': SpearmanrResult(correlation=-0.0659692915506869, pvalue=0.29019564206582454),
 'Core_top': SpearmanrResult(correlation=-0.014157935088167645, pvalue=0.8206106376673565),
 'Perimeter_bot_ZN_2': SpearmanrResult(correlation=-0.023027883492999772, pvalue=0.7122357735978578),
 'Perimeter_bot_ZN_3': SpearmanrResult(correlation=0.005040025970258528, pvalue=0.9356648561061454),
 'Perimeter_mid_ZN_4': SpearmanrResult(correlation=-0.04520689869527078, pvalue=0.46882785801007754),
 'Core_mid': SpearmanrResult(correlation=-0.022494664355129472, pvalue=0.7186139150742867),
 'Perimete

In [18]:
with open(f"data/jacob_cov_raw_scores_{num_days}_days_19_07_2022.pkl", "wb+") as f:
    pickle.dump(policy_scores_jc, f)

In [19]:
with open(f"data/jacob_cov_spearman_corr_{num_days}_days_19_07_2022_new_policies.pkl", "wb+") as f:
    pickle.dump(spearman_corr_jc, f)

## 4. SNIP

In [5]:
from zero_cost_proxies.snip import SNIP
policy_scores_snip = {}
for zone in zones:
    print(zone)
    ope_data = log_data[log_data["zone"] == zone].sort_values(by=["timestep"])
    ope_data = log_data[:ts_end]
    for policy in tqdm(policy_list):
        agent = PPO(6, 1, 0.003, 0.0005, 1, 10, 0.2,
                    has_continuous_action_space=True, action_std_init=0.2, 
                    device=torch.device('cpu'), diverse_policies=list(),
                    diverse_weight=0, diverse_increase=True)
        agent.load(policy)
        agent.policy_evaluation = True
        agent.policy_old.set_action_std(0.1)
        snip = SNIP(agent, behavior_model)
        if policy not in invalid_policies:
            sn = snip.compute_snip(ope_data)
        else:
            continue
        if policy not in policy_scores_snip:
            policy_scores_snip[policy] = {}
        if zone not in policy_scores_snip[policy]:
            policy_scores_snip[policy][zone] = sn

Perimeter_top_ZN_3


  0%|          | 0/400 [00:00<?, ?it/s]

  return np.log(value / (1- value))


Perimeter_bot_ZN_1


  0%|          | 0/400 [00:00<?, ?it/s]

Perimeter_top_ZN_1


  0%|          | 0/400 [00:00<?, ?it/s]

Core_bottom


  0%|          | 0/400 [00:00<?, ?it/s]

Perimeter_top_ZN_4


  0%|          | 0/400 [00:00<?, ?it/s]

Core_top


  0%|          | 0/400 [00:00<?, ?it/s]

Perimeter_bot_ZN_2


  0%|          | 0/400 [00:00<?, ?it/s]

Perimeter_bot_ZN_3


  0%|          | 0/400 [00:00<?, ?it/s]

Perimeter_mid_ZN_4


  0%|          | 0/400 [00:00<?, ?it/s]

Core_mid


  0%|          | 0/400 [00:00<?, ?it/s]

Perimeter_mid_ZN_2


  0%|          | 0/400 [00:00<?, ?it/s]

Perimeter_mid_ZN_3


  0%|          | 0/400 [00:00<?, ?it/s]

Perimeter_top_ZN_2


  0%|          | 0/400 [00:00<?, ?it/s]

Perimeter_mid_ZN_1


  0%|          | 0/400 [00:00<?, ?it/s]

Perimeter_bot_ZN_4


  0%|          | 0/400 [00:00<?, ?it/s]

In [6]:
eval_data_loc = "../data/evaluation_clean_20220705.csv"
df = pd.read_csv(eval_data_loc, header=None, names=["datetime","policy","zone","energy"])
spearman_corr_snip = {}
for zone in zones:
    eval_df = df[df["zone"] == zone]

    # invalid_policies = list(set(policy_list) - set(policy_scores.keys()))
    for i_policy in invalid_policies:
        eval_df = eval_df[eval_df["policy"]!=i_policy[3:]]
    eval_df = eval_df.sort_values(by=["energy"])
    score_list = []
    for i, row in eval_df.iterrows():
        score_list.append(policy_scores_snip[f"../{row['policy']}"][zone])

    eval_df["ope_scores"] = score_list
    correlation = spearmanr(eval_df["energy"].values, eval_df["ope_scores"].values)
    spearman_corr_snip[zone] = correlation

In [7]:
spearman_corr_snip

{'Perimeter_top_ZN_3': SpearmanrResult(correlation=-0.6071296881603231, pvalue=1.7742316549099118e-27),
 'Perimeter_bot_ZN_1': SpearmanrResult(correlation=-0.6471213612162425, pvalue=4.002833947868811e-32),
 'Perimeter_top_ZN_1': SpearmanrResult(correlation=-0.6567822398149569, pvalue=2.3619323297879807e-33),
 'Core_bottom': SpearmanrResult(correlation=-0.48507796344827286, pvalue=1.0814563440486364e-16),
 'Perimeter_top_ZN_4': SpearmanrResult(correlation=-0.6415653239138973, pvalue=1.9467364894573024e-31),
 'Core_top': SpearmanrResult(correlation=-0.48430639760622374, pvalue=1.2281071919597128e-16),
 'Perimeter_bot_ZN_2': SpearmanrResult(correlation=-0.6468060044939302, pvalue=4.3826851777066006e-32),
 'Perimeter_bot_ZN_3': SpearmanrResult(correlation=-0.43617963128132564, pvalue=1.8747026724090264e-13),
 'Perimeter_mid_ZN_4': SpearmanrResult(correlation=-0.563177842403419, pvalue=4.483259814368097e-23),
 'Core_mid': SpearmanrResult(correlation=-0.48145361616774823, pvalue=1.959877436

In [8]:
with open(f"data/snip_spearman_corr_{num_days}_days_02_08_2022.pkl", "wb+") as f:
    pickle.dump(spearman_corr_snip, f)

In [9]:
with open(f"data/snip_raw_scores_{num_days}_days_02_08_2022.pkl", "wb+") as f:
    pickle.dump(policy_scores_snip, f)