In [1]:
from constants import *
from utils import evaluate_model_policy, plot_study, plot_fig
from trainer import get_trained_model
import optuna
from environment import StreetFighterEnv
from stable_baselines3 import PPO, A2C
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv, VecFrameStack
from actor_critic import A2CCNNPolicy
from feature_extractors import CNNExtractorWithAttention, CNNExtractor
from tuner import Tuner
import os

# FRAME SIZE = 4

In [2]:
TIMESTEPS = 1000000
N_TRIALS = 20
FRAME_SIZE = 4

PLOTLY_CONFIG = {"staticPlot": True}

In [None]:
model = A2C
model_dir = 'models/with_bias_frame_4'
env = StreetFighterEnv()
policy_network = A2CCNNPolicy

policy_kwargs = dict(
    features_extractor_class=CNNExtractorWithAttention,
    features_extractor_kwargs=dict(frame_size=FRAME_SIZE, features_dim=512,),
)
tuner = Tuner(model=model, env=env, policy_network=policy_network, policy_args=policy_kwargs,
              frame_size=FRAME_SIZE, timesteps=TIMESTEPS, save_dir=model_dir)

study = tuner.tune_study(n_trials=N_TRIALS, )
study.best_trial.number, study.best_params

[32m[I 2022-04-17 03:36:48,005][0m A new study created in memory with name: no-name-df1e9e8e-86fe-49cf-9951-ce748b73be72[0m


  0%|          | 0/20 [00:00<?, ?it/s]

[32m[I 2022-04-17 04:46:08,417][0m Trial 0 finished with value: 0.0 and parameters: {'gamma': 0.9610590868182427, 'learning_rate': 1.2730024504817727e-05, 'gae_lambda': 0.8358271893298}. Best is trial 0 with value: 0.0.[0m
[32m[I 2022-04-17 05:54:32,048][0m Trial 1 finished with value: 6800.0 and parameters: {'gamma': 0.9892997854860126, 'learning_rate': 7.189161693024792e-05, 'gae_lambda': 0.9514619233779469}. Best is trial 1 with value: 6800.0.[0m
[32m[I 2022-04-17 07:00:54,050][0m Trial 2 finished with value: 1000.0 and parameters: {'gamma': 0.9608380585220108, 'learning_rate': 2.9617795121505266e-05, 'gae_lambda': 0.9096156803402182}. Best is trial 1 with value: 6800.0.[0m
[32m[I 2022-04-17 08:06:12,491][0m Trial 3 finished with value: 2000.0 and parameters: {'gamma': 0.9659929325597487, 'learning_rate': 5.07384069776803e-05, 'gae_lambda': 0.9083976132228462}. Best is trial 1 with value: 6800.0.[0m
[32m[I 2022-04-17 09:12:13,210][0m Trial 4 finished with value: 2000.0

In [None]:
plots = plot_study(study)
for plot in plots:
    plot.show("notebook", config=PLOTLY_CONFIG)

# FRAME SIZE = 1

In [None]:
FRAME_SIZE = 1

In [None]:
model = A2C
model_dir = 'models/with_bias_frame_1'
env = StreetFighterEnv()
policy_network = A2CCNNPolicy

policy_kwargs = dict(
    features_extractor_class=CNNExtractorWithAttention,
    features_extractor_kwargs=dict(frame_size=FRAME_SIZE, features_dim=512,),
)
tuner = Tuner(model=model, env=env, policy_network=policy_network, policy_args=policy_kwargs,
              frame_size=FRAME_SIZE, timesteps=TIMESTEPS, save_dir=model_dir)

study = tuner.tune_study(n_trials=N_TRIALS, )
study.best_trial.number, study.best_params

In [None]:
plots = plot_study(study)
for plot in plots:
    plot.show("notebook", config=PLOTLY_CONFIG)