# Adaption Evaluation
Comparision between: 
- MAML
- pretrained model

In [None]:
import torch as th
import vectorbt as vbt
import gymnasium as gym
import torch as th
from torchmeta.utils.gradient_based import gradient_update_parameters
from collections import OrderedDict
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
sns.set()
import plotly.graph_objects as go

from stock_env.envs import *
from stock_env.common.evaluation import evaluate_agent, play_an_episode
from stock_env.algos.agent import MetaAgent
from stock_env.common.common_utils import open_config
from stock_env.algos.maml import adapt, get_task_loss
from stock_env.algos.buffer import RolloutBuffer

In [22]:
def evaluation_results(args, agent, envs, buffer, results, task, adapt_step, model_type, params):
    agent.load_state_dict(params)
    agent.eval()
    envs.train(False)
    mean, std = evaluate_agent(agent, envs, n_eval_episodes=args.n_eval_episodes)
    
    agent.train()
    envs.train()
    agent.zero_grad()
    params, inner_loss = adapt(args, agent, envs, buffer, n_adapt_steps=1)
    
    # save results
    results['task'].append(task)
    results['n_adapt_steps'].append(adapt_step)
    results['model_type'].append(model_type)
    results['mean'].append(mean)
    results['std'].append(std)
    results['inner_loss'].append(inner_loss)
    return params, results

def plot_mean_reward(fig, df, fill_color, line_color, name):
    
    fig.add_trace(go.Scatter(
        x=df['n_adapt_steps'], 
        y=df['mean-std'],
        mode='lines', 
        fill=None,
        line=dict(width=0.5, color=fill_color),
        showlegend=False))
    
    fig.add_trace(go.Scatter(
        x=df['n_adapt_steps'],
        y=df['mean+std'],
        fill='tonexty',
        mode='lines', 
        line=dict(width=0.5, color=fill_color),
        showlegend=False))
    
    fig.add_trace(go.Scatter(
        x=df['n_adapt_steps'], 
        y=df['mean'],
        fill=None,
        line_color=line_color,
        name=name))

In [None]:
meta_state_dict_path = '../model/maml_minifaang_20221214_160142.pth'
device = th.device("cuda" if th.cuda.is_available() else "cpu")

test_env_id = 'MiniVNStock-v0'
test_args = open_config('../configs/maml.yaml', env_id=test_env_id)
test_envs = MetaVectorEnv([lambda: gym.make(test_env_id) for _ in range(test_args.num_envs)])
num_steps = test_envs.envs[0].data_loader.max_episode_steps
test_buffer = RolloutBuffer(num_steps, test_envs, device=device, gamma=test_args.gamma, gae_lambda=test_args.gae_lambda)

env_id = 'MiniFAANG-v0'
args = open_config('../configs/maml.yaml', env_id=env_id)
_env = MetaVectorEnv([lambda: gym.make(env_id) for _ in range(1)])

# random agent
random_agent = MetaAgent(_env)

# meta agent
meta_agent = MetaAgent(_env)
meta_agent.load_state_dict(th.load(meta_state_dict_path))

# pretrained agent

In [None]:
from copy import deepcopy

TOTAL_ADAPT_STEPS = 5

task = test_envs.sample_task(1)[0]
print(f"Ticker: {task}")
test_envs.reset_task(task)

results = {
    'task': [],
    'n_adapt_steps': [],
    'model_type': [],
    'mean': [],
    'std': [],
    'inner_loss': [],
}

old_params = maml_params = OrderedDict(meta_agent.meta_named_parameters())
maml_adapted_agent = deepcopy(meta_agent)

old_random_params = random_params = OrderedDict(random_agent.meta_named_parameters())
adapted_random_agent = deepcopy(random_agent)
for i in range(TOTAL_ADAPT_STEPS):
    # MAML
    maml_params, results = evaluation_results(test_args, maml_adapted_agent, test_envs, test_buffer, results, task, i, 'maml', maml_params)
    
    # random
    random_params, results = evaluation_results(test_args, adapted_random_agent, test_envs, test_buffer, results, task, i, 'random', random_params)

# preprocess for plotting
df_result = pd.DataFrame(results)
df_result['mean-std'] = df_result['mean'] - df_result['std']
df_result['mean+std'] = df_result['mean'] + df_result['std']
maml_df = df_result[df_result.model_type == 'maml']
random_df = df_result[df_result.model_type == 'random']

In [39]:
fig = go.Figure()
# adjust size Figure
fig.update_layout(width=500, height=500)
fig.update_layout(title='Mean Reward +/- Std vs Number of Adaptation Steps', title_x=0.5)
fig.update_xaxes(title_text='Number of Adaptation Steps', dtick=1, range=[0, TOTAL_ADAPT_STEPS-1])
fig.update_yaxes(title_text='Mean Reward +/- Std', dtick=100)

plot_mean_reward(fig, maml_df, fill_color='lightcoral', line_color='red', name='MAML')
plot_mean_reward(fig, random_df, fill_color='lightskyblue', line_color='blue', name='Random')

fig.show()