# NAS Inspired Zero-Cost Proxy Evaluation  w/ minibatch

Testing NAS inspired zero-cost proxies for offline off-policy policy evaluation.

In [None]:
import sys
import pandas as pd
import pickle
sys.path.insert(0, "../")
import torch
from ppo import PPO
import glob
from tqdm.notebook import tqdm
from scipy.stats import spearmanr

In [None]:
log_data = pd.read_csv("../data/rule_based_log_data/<name_of_building>/0_cleaned_log.csv")
with open("../data/rule_based_log_data/<name_of_building>/action_probs_all_data.pkl", "rb") as f:
    behavior_model = pickle.load(f)

## 30 Day MiniBatch

In [None]:
num_ts_per_day = 4 * 24
num_days = 15 # Number of days to consider for evaluation
ts_end = num_ts_per_day * num_days
zones = log_data["zone"].unique()

## Loading all Policies

In [None]:
policy_list = sorted(list(glob.glob(f"../policy_library_20220820/**.pth")))
invalid_policies = []

### 1. GradNorm

In [None]:
from zero_cost_proxies.grad_norm import GradNorm
policy_scores = {}
for zone in zones:
    print(zone)
    ope_data = log_data[log_data["zone"] == zone].sort_values(by=["timestep"])
    ope_data = log_data[:ts_end]
    for policy in tqdm(policy_list):
        agent = PPO(6, 1, 0.003, 0.0005, 1, 10, 0.2,
                    has_continuous_action_space=True, action_std_init=0.2, 
                    device=torch.device('cpu'), diverse_policies=list(),
                    diverse_weight=0, diverse_increase=True)
        agent.load(policy)
        agent.policy_evaluation = True
        agent.policy_old.set_action_std(0.1)
        gn = GradNorm(agent, behavior_model)
        if policy not in invalid_policies:
            norm = gn.get_grad_norm(ope_data)
        else:
            continue
        if policy not in policy_scores:
            policy_scores[policy] = {}
        if zone not in policy_scores[policy]:
            policy_scores[policy][zone] = norm

#### Saving Raw Policy Scores

In [None]:
with open("grad_norm_raw_scores_16_06_2022.pkl", "wb+") as f:
    pickle.dump(policy_scores, f)

#### Generating and Saving Spearman Correlation (OPTIONAL)

This step is done to calculate how well the ranking method works. This requires the ground truth performance of all policies in the policy library on the new building. The ground truth data can only be generated via a brute-force method.

In [None]:
eval_data_loc = "../data/evaluation_report_20220820.csv"
df = pd.read_csv(eval_data_loc, header=None, names=["datetime","policy","zone","energy"])
spearman_corr = {}
for zone in zones:
    eval_df = df[df["zone"] == zone]

    # invalid_policies = list(set(policy_list) - set(policy_scores.keys()))
    for i_policy in invalid_policies:
        eval_df = eval_df[eval_df["policy"]!=i_policy[3:]]
    eval_df = eval_df.sort_values(by=["energy"])
    score_list = []
    for i, row in eval_df.iterrows():
        score_list.append(policy_scores[f"../{row['policy']}"][zone])

    eval_df["ope_scores"] = score_list
    correlation = spearmanr(eval_df["energy"].values, eval_df["ope_scores"].values)
    spearman_corr[zone] = correlation

### 2. Synflow

In [None]:
from zero_cost_proxies.synflow import SynFlow
policy_scores_synflow = {}
for zone in zones:
    print(zone)
    ope_data = log_data[log_data["zone"] == zone].sort_values(by=["timestep"])
    ope_data = log_data[:ts_end]
    for policy in tqdm(policy_list):
        agent = PPO(6, 1, 0.003, 0.0005, 1, 10, 0.2,
                    has_continuous_action_space=True, action_std_init=0.2, 
                    device=torch.device('cpu'), diverse_policies=list(),
                    diverse_weight=0, diverse_increase=True)
        agent.load(policy)
        agent.policy_evaluation = True
        agent.policy_old.set_action_std(0.1)
        sf = SynFlow(agent)
        if policy not in invalid_policies:
            synflow = sf.get_synflow()
        else:
            continue
        if policy not in policy_scores_synflow:
            policy_scores_synflow[policy] = {}
        if zone not in policy_scores_synflow[policy]:
            policy_scores_synflow[policy][zone] = synflow

#### Saving Raw Policy Scores

In [None]:
with open("data/synflow_raw_scores.pkl", "wb+") as f:
    pickle.dump(policy_scores_synflow, f)

#### Generating and Saving Spearman Correlation (OPTIONAL)

This step is done to calculate how well the ranking method works. This requires the ground truth performance of all policies in the policy library on the new building. The ground truth data can only be generated via a brute-force method.

In [None]:
eval_data_loc = "../data/1month_eval.csv"
df = pd.read_csv(eval_data_loc, header=None, names=["datetime","policy","zone","energy"])
spearman_corr_synflow = {}
for zone in zones:
    eval_df = df[df["zone"] == zone]

    # invalid_policies = list(set(policy_list) - set(policy_scores.keys()))
    for i_policy in invalid_policies:
        eval_df = eval_df[eval_df["policy"]!=i_policy[3:]]
    eval_df = eval_df.sort_values(by=["energy"])
    score_list = []
    for i, row in eval_df.iterrows():
        score_list.append(policy_scores_synflow[f"../{row['policy']}"][zone])

    eval_df["ope_scores"] = score_list
    correlation = spearmanr(eval_df["energy"].values, eval_df["ope_scores"].values)
    spearman_corr_synflow[zone] = correlation

### 3. Jacobian Covariance

In [None]:
from zero_cost_proxies.jacob_cov import JacobianCovariance
policy_scores_jc = {}
for zone in zones:
    print(zone)
    ope_data = log_data[log_data["zone"] == zone].sort_values(by=["timestep"])
    ope_data = log_data[:ts_end]
    states = []
    for i, row in ope_data.iterrows():
        state_vars = ["outdoor_temp", "solar_irradiation", "time_hour",
                      "zone_humidity", "zone_temp", "zone_occupancy"]
        state = [row[var] for var in state_vars]
        states.append(state)
    states = torch.Tensor(states)
    for policy in tqdm(policy_list):
        agent = PPO(6, 1, 0.003, 0.0005, 1, 10, 0.2,
                    has_continuous_action_space=True, action_std_init=0.2, 
                    device=torch.device('cpu'), diverse_policies=list(),
                    diverse_weight=0, diverse_increase=True)
        agent.load(policy)
        agent.policy_evaluation = True
        agent.policy_old.set_action_std(0.1)
        jc = JacobianCovariance(agent)
        if policy not in invalid_policies:
            synflow = jc.compute_jacob_cov(states)
        else:
            continue
        if policy not in policy_scores_jc:
            policy_scores_jc[policy] = {}
        if zone not in policy_scores_jc[policy]:
            policy_scores_jc[policy][zone] = synflow

#### Saving Raw Policy Scores

In [None]:
with open("data/jacob_cov_raw_scores.pkl", "wb+") as f:
    pickle.dump(policy_scores_jc, f)

#### Generating and Saving Spearman Correlation (OPTIONAL)

This step is done to calculate how well the ranking method works. This requires the ground truth performance of all policies in the policy library on the new building. The ground truth data can only be generated via a brute-force method.

In [None]:
eval_data_loc = "../data/1month_eval.csv"
df = pd.read_csv(eval_data_loc, header=None, names=["datetime","policy","zone","energy"])
spearman_corr_jc = {}
for zone in zones:
    eval_df = df[df["zone"] == zone]

    # invalid_policies = list(set(policy_list) - set(policy_scores.keys()))
    for i_policy in invalid_policies:
        eval_df = eval_df[eval_df["policy"]!=i_policy[3:]]
    eval_df = eval_df.sort_values(by=["energy"])
    score_list = []
    for i, row in eval_df.iterrows():
        score_list.append(policy_scores_jc[f"../{row['policy']}"][zone])

    eval_df["ope_scores"] = score_list
    correlation = spearmanr(eval_df["energy"].values, eval_df["ope_scores"].values)
    spearman_corr_jc[zone] = correlation

## 4. SNIP

In [None]:
from zero_cost_proxies.snip import SNIP
policy_scores_snip = {}
for zone in zones:
    print(zone)
    ope_data = log_data[log_data["zone"] == zone].sort_values(by=["timestep"])
    ope_data = log_data[:ts_end]
    for policy in tqdm(policy_list):
        agent = PPO(6, 1, 0.003, 0.0005, 1, 10, 0.2,
                    has_continuous_action_space=True, action_std_init=0.2, 
                    device=torch.device('cpu'), diverse_policies=list(),
                    diverse_weight=0, diverse_increase=True)
        agent.load(policy)
        agent.policy_evaluation = True
        agent.policy_old.set_action_std(0.1)
        snip = SNIP(agent, behavior_model)
        if policy not in invalid_policies:
            sn = snip.compute_snip(ope_data)
        else:
            continue
        if policy not in policy_scores_snip:
            policy_scores_snip[policy] = {}
        if zone not in policy_scores_snip[policy]:
            policy_scores_snip[policy][zone] = sn

#### Saving Raw Policy Scores

In [None]:
with open(f"data/15zone/snip/raw_scores/snip_raw_scores_{num_days}_days_06_09_2022.pkl", "wb+") as f:
    pickle.dump(policy_scores_snip, f)

#### Generating and Saving Spearman Correlation (OPTIONAL)

This step is done to calculate how well the ranking method works. This requires the ground truth performance of all policies in the policy library on the new building. The ground truth data can only be generated via a brute-force method.

In [None]:
eval_data_loc = "../data/evaluation_report_20220820.csv"
df = pd.read_csv(eval_data_loc, header=None, names=["datetime","policy","zone","energy"])
spearman_corr_snip = {}
for zone in zones:
    eval_df = df[df["zone"] == zone]

    # invalid_policies = list(set(policy_list) - set(policy_scores.keys()))
    for i_policy in invalid_policies:
        eval_df = eval_df[eval_df["policy"]!=i_policy[3:]]
    eval_df = eval_df.sort_values(by=["energy"])
    score_list = []
    for i, row in eval_df.iterrows():
        score_list.append(policy_scores_snip[f"../{row['policy']}"][zone])

    eval_df["ope_scores"] = score_list
    correlation = spearmanr(eval_df["energy"].values, eval_df["ope_scores"].values)
    spearman_corr_snip[zone] = correlation

with open(f"data/15zone/snip/spearman_corr/snip_spearman_corr_{num_days}_days_06_09_2022.pkl", "wb+") as f:
    pickle.dump(spearman_corr_snip, f)