# Analysis of UCB-Based Policy Selection

1. Compare the number of evaluations vs Spearman Rank Correlation to validation data
2. UCB evaluates each policy for 1-day and takes aggregate energy consumption as reward
3. Tested on the 15-zone test building

In [1]:
import numpy as np
import pandas as pd
from scipy.stats import spearmanr
import json
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

## Valdiation Data

In [3]:
eval_data_loc = "../data/1month_eval.csv"
invalid_policies_loc = "../data/invalid_policy_list.json"

eval_df = pd.read_csv(eval_data_loc, header=None, names=["datetime", "policy", "zone", "energy"])
with open(invalid_policies_loc) as f:
    invalid_policies = json.load(f)["invalid_policies"]

## UCB Zone Data

In [6]:
core_top_loc = "../data/ucb_log_data/Core_top/15/ucb_log_data.csv"
core_top_df = pd.read_csv(core_top_loc)
core_top_df

Unnamed: 0,datetime,flops,policy_name,start_year,start_month,start_day,policy_library/100_0.pth_score,policy_library/100_0.pth_count,policy_library/100_1.pth_score,policy_library/100_1.pth_count,...,policy_library/119_2_1e1.pth_score,policy_library/119_2_1e1.pth_count,policy_library/119_3.pth_score,policy_library/119_3.pth_count,policy_library/119_3_1e0.pth_score,policy_library/119_3_1e0.pth_count,policy_library/119_4.pth_score,policy_library/119_4.pth_count,policy_library/119_4_1e0.pth_score,policy_library/119_4_1e0.pth_count
0,2022-06-29 17:00:36.084208,0,policy_library/100_0.pth,1999,6,14,-33.035254,1.0,inf,0.0,...,inf,0.0,inf,0.0,inf,0.0,inf,0.0,inf,0.0
1,2022-06-29 17:00:39.711742,0,policy_library/100_1.pth,1999,5,28,-33.035254,1.0,-9.130713,1.0,...,inf,0.0,inf,0.0,inf,0.0,inf,0.0,inf,0.0
2,2022-06-29 17:00:43.356522,0,policy_library/100_1_1e-1.pth,1998,6,23,-33.035254,1.0,-9.130713,1.0,...,inf,0.0,inf,0.0,inf,0.0,inf,0.0,inf,0.0
3,2022-06-29 17:00:46.928150,0,policy_library/100_1_1e0.pth,1997,3,31,-33.035254,1.0,-9.130713,1.0,...,inf,0.0,inf,0.0,inf,0.0,inf,0.0,inf,0.0
4,2022-06-29 17:00:50.513171,0,policy_library/100_1_1e1.pth,2005,3,7,-33.035254,1.0,-9.130713,1.0,...,inf,0.0,inf,0.0,inf,0.0,inf,0.0,inf,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5165,2022-06-29 22:29:28.597447,0,policy_library/103_1_1e0.pth,1999,3,9,-33.035254,1.0,-9.904467,84.0,...,-10.556331,20.0,-19.396646,1.0,-14.141299,2.0,-12.381252,5.0,-22.295125,1.0
5166,2022-06-29 22:29:33.197100,0,policy_library/103_1_1e0.pth,1997,11,10,-33.035254,1.0,-9.904467,84.0,...,-10.556331,20.0,-19.396646,1.0,-14.141299,2.0,-12.381252,5.0,-22.295125,1.0
5167,2022-06-29 22:29:37.291178,0,policy_library/103_1_1e0.pth,1999,10,5,-33.035254,1.0,-9.904467,84.0,...,-10.556331,20.0,-19.396646,1.0,-14.141299,2.0,-12.381252,5.0,-22.295125,1.0
5168,2022-06-29 22:29:41.895100,0,policy_library/103_1_1e0.pth,1999,8,23,-33.035254,1.0,-9.904467,84.0,...,-10.556331,20.0,-19.396646,1.0,-14.141299,2.0,-12.381252,5.0,-22.295125,1.0


In [17]:
core_top_cols = core_top_df.columns
policy_names = [col[15:-10] for col in core_top_cols if "score" in col]
num_policies = len(policy_names)

## UCB Q-Value Plot

In [None]:
core_top_q_vals_df = pd.DataFrame()
policies_for_plot = policy_names[:3]
for policy in policies_for_plot:
    df_col_name = f"policy_library/{policy}.pth_score"
    core_top_q_vals_df["timestep"] = core_top_df[len(policies_for_plot):].index
    core_top_q_vals_df[f"{policy}_value"] = core_top_df[len(policies_for_plot):][df_col_name]

3
RangeIndex(start=3, stop=5170, step=1)
0          3
1          4
2          5
3          6
4          7
        ... 
5162    5165
5163    5166
5164    5167
5165    5168
5166    5169
Name: timestep, Length: 5167, dtype: int64


In [21]:
core_top_q_vals_df

Unnamed: 0,timestep,100_0_value,100_1_value,100_1_1e-1_value
0,3,,,
1,4,,,
2,5,,,
3,6,-33.035254,-9.130713,-20.797032
4,7,-33.035254,-9.130713,-20.797032
...,...,...,...,...
5162,5165,-33.035254,-9.904467,-20.797032
5163,5166,-33.035254,-9.904467,-20.797032
5164,5167,-33.035254,-9.904467,-20.797032
5165,5168,-33.035254,-9.904467,-20.797032


In [None]:
plt.figure(figsize=(10, 7))
plt.title("UCB Q-Value")