In [2]:
import torch
import numpy as np
from stable_baselines3 import PPO

import os, shutil, sys
sys.path.append(os.path.abspath('./env'))
sys.path.append(os.path.abspath('./common'))


from env_find_path import FindPath
import common.mlflow_sb3_helper as mlf
import common.tensorboard_sb3_helper as tnb
USE_MLFLOW = False

from pytz import timezone
from datetime import datetime

TZ = timezone('Europe/Moscow')

activations = {'ReLU':torch.nn.ReLU, 'Tanh':torch.nn.Tanh}
device = "cuda" if torch.cuda.is_available() else "cpu"

In [3]:
log_helper = {}
log_path = {}

if USE_MLFLOW:
    log_helper = mlf.MLflowServerHelper("http://192.168.0.206:2670", False)
else:
    log_helper = tnb.TensorboardHelper("",False)

In [3]:
env = FindPath(target_point_rand=False, area_size=20 )
env_render = FindPath(target_point_rand=False, area_size=20, render_mode='rgb_array')

In [4]:
exp_params = {
    'env_name': '',
    'algorithm_name': '',
    'exp_id': 0,
    'exp_name': f'exp_{datetime.now(TZ).strftime("%d%m_%H%M%S")}',
    'seed': 21,
    'net': {
        'activation': 'Tanh',
        'pi': [1024, 512],
        'qf': [256, 256],
        'vf': [1024, 512],
    },
    'training': {
        'iteration_count': 10,
        'episode_count': 99000,
        'policy': 'MlpPolicy',
        'buffer_size': 1500000,
        'batch_size': 256,
        'gamma': 0.95,
        'verbose': 0,
        'device': device,
    },
    'validation': {
        'validate_agent_every_n_eps': 50000,
        'log_interval': 10
    },
    'evaluation': {
        'episode_count': 1
    }
}

policy_kwargs = dict(activation_fn = activations[exp_params['net']['activation']],
                     net_arch = dict(pi=exp_params['net']['pi'], qf=exp_params['net']['qf'], vf=exp_params['net']['vf']))




In [None]:
#!!!!! Новый эксперимент
exp_params['env_name'] = env.name()
exp_name = 'env_' + exp_params['env_name'] + '_' + exp_params['exp_name']
experiment_id = log_helper.new_experiment(exp_name)

In [None]:
#!!!!! Дорасчет старого эксперимента
exp_params['env_name'] = env.name()
experiment_id = 161
exp_name = log_helper.get_experiment(experiment_id)
print(exp_name)

In [None]:
exp_params['exp_id'] = experiment_id

b_first = True
name = "PPO"

for i in range(exp_params['training']['iteration_count']):

    exp_params['algorithm_name'] = name
    exp_params['seed'] = int(datetime.now(TZ).strftime("%H%M%S"))
    
    model = None
    if b_first == True:
        b_first = False
        model = PPO(  exp_params['training']['policy'], 
                    env,
                    policy_kwargs=policy_kwargs,
                    batch_size=exp_params['training']['batch_size'],
                    gamma=exp_params['training']['gamma'],
                    verbose=exp_params['training']['verbose'],
                    device=device,
                    )
    else:
        model = PPO.load(f'{exp_name}/model.zip', env=env, device=device)

    art_loc, exp_name, run_id = log_helper.learn_and_fix(
                                model=model, 
                                env= env_render,
                                run_name=f'{name}_{i+1}',
                                episode_count = exp_params['training']['episode_count'],
                                parameters=exp_params,
                                experiment_id=experiment_id,
                                checkpoint_interval=exp_params['validation']['validate_agent_every_n_eps'],
                                log_interval=exp_params['validation']['log_interval'],
                                video_episods=3,
                                video_fps=5
                                )
    log_path[i] = f'{art_loc}/{run_id}/artifacts/{exp_name}/sb3/model.zip'


In [None]:
log_path

In [5]:
experiment_id = 484
log_path = {  
'PPO': f'mlflow-artifacts:/{experiment_id}/e18ac8164e2d4da3af0189d828e6fbdf/artifacts/env_FindPath_exp_2210_172357/sb3/model.zip' 
}

In [None]:
load_path = os.path.join(str(experiment_id))
if  os.path.isdir(load_path) == False:
    os.mkdir(load_path)
load_path

In [None]:

loadel_models = {}

for model_name, path in log_path.items():
    loadel_models[model_name] =  f'{load_path}/{model_name}/'
    log_helper.load_artifact(path,loadel_models[model_name])

In [8]:
env = FindPath(target_point_rand=False, area_size=20 )
env_render = FindPath(target_point_rand=False, area_size=20, render_mode='rgb_array')

In [None]:
#!!! Дорасчет в НОВОМ эксперименте
exp_params['env_name'] = env.name()
exp_params['exp_name'] = f'exp_{datetime.now(TZ).strftime("%d%m_%H%M%S")}'
exp_name = 'env_' + exp_params['env_name'] + '_' + exp_params['exp_name']
experiment_id = log_helper.new_experiment(exp_name)
exp_params['exp_id'] = experiment_id
print(exp_name)


In [None]:
#!!! Дорасчет в СТАРОМ эксперименте
exp_name = log_helper.get_experiment(experiment_id)
print(exp_name)

In [None]:

b_first = True
name = "PPO"
path = loadel_models[name]

exp_params['exp_id'] = experiment_id
exp_params['algorithm_name'] = name

for i in range(exp_params['training']['iteration_count']):

    exp_params['seed'] = int(datetime.now(TZ).strftime("%H%M%S"))
    
    model = None
    if b_first == True:
        b_first = False
        print(path)
        model = PPO.load(f'{path}/model.zip', env=env, device=device)  
    else:
        print(exp_name)
        model = PPO.load(f'{exp_name}/model.zip', env=env, device=device)

    art_loc, exp_name, run_id = log_helper.learn_and_fix(
                                model=model, 
                                env= env_render,
                                run_name=f'{name}_{i+1}',
                                episode_count = exp_params['training']['episode_count'],
                                parameters=exp_params,
                                experiment_id=experiment_id,
                                checkpoint_interval=exp_params['validation']['validate_agent_every_n_eps'],
                                log_interval=exp_params['validation']['log_interval'],
                                video_episods=3,
                                video_fps=5
                                )
    log_path[i] = f'{art_loc}/{run_id}/artifacts/{exp_name}/sb3/model.zip'

In [11]:
shutil.rmtree(os.path.join(load_path))