In [1]:
import torch
import numpy as np
from stable_baselines3 import DDPG


import os, shutil, sys
sys.path.append(os.path.abspath('./env'))
sys.path.append(os.path.abspath('./common'))

from env.env_move_sector  import HumanMoveSectorAction

import common.mlflow_sb3_helper as mlf
import common.tensorboard_sb3_helper as tnb
USE_MLFLOW = False

from pytz import timezone
from datetime import datetime

TZ = timezone('Europe/Moscow')
device = "cuda" if torch.cuda.is_available() else "cpu"

In [11]:
exp_params = {
    'env_name': '',
    'algorithm_name': '',
    'exp_id': 0,
    'exp_name': f'exp_{datetime.now(TZ).strftime("%d%m_%H%M%S")}',
    'seed': 21,
    'net': {
        'activation': 'ReLU',
        'pi': [256, 256],
        'qf': [256, 256],
        'vf': [256, 256],
    },
    'training': {
        'iteration_count': 1,
        'episode_count': 59000,
        'policy': 'MlpPolicy',
        'learning_rate': 0.003,
        'buffer_size': 1500000,
        'learning_starts': 100,
        'batch_size': 1024,
        'tau': 0.005,
        'gamma': 0.99,
        'verbose': 0,
        'device': device,
    },
    'options':{
        'finish_dist':100,
        'start_dist':110,
        'delta_phi':0
    },
    'validation': {
        'validate_agent_every_n_eps': 10000,
        'log_interval': 10
    },
    'evaluation': {
        'episode_count': 3
    }
}


option_var ={
    'DDPG 100_0':{'finish_dist':100, 'start_dist':110, 'delta_phi':0 },
    'DDPG 80_0':{'finish_dist':80, 'start_dist':110, 'delta_phi':0 },
    #'DDPG 80_20':{'finish_dist':80, 'start_dist':110, 'delta_phi':20 },
    'DDPG 80_45':{'finish_dist':80, 'start_dist':110, 'delta_phi':45 },
    'DDPG 80_90':{'finish_dist':80, 'start_dist':110, 'delta_phi':90 },
    'DDPG 80_135':{'finish_dist':80, 'start_dist':110, 'delta_phi':135 },
    'DDPG 80_180':{'finish_dist':80, 'start_dist':110, 'delta_phi':180 },
    'DDPG 60_180':{'finish_dist':60, 'start_dist':110, 'delta_phi':180 },
    'DDPG 40_180':{'finish_dist':40, 'start_dist':110, 'delta_phi':180 },
    'DDPG 20_180':{'finish_dist':20, 'start_dist':110, 'delta_phi':180 },
    'DDPG 10_180':{'finish_dist':10, 'start_dist':110, 'delta_phi':180 },
}

option_var2 ={
    #'DDPG 5_110_180':{'finish_dist':5, 'start_dist':110, 'delta_phi':180 },
    'DDPG 5_120_180':{'finish_dist':5, 'start_dist':120, 'delta_phi':180 },
    'DDPG 5_130_180':{'finish_dist':5, 'start_dist':130, 'delta_phi':180 },
    'DDPG 5_140_180':{'finish_dist':5, 'start_dist':140, 'delta_phi':180 },
    'DDPG 5_150_180':{'finish_dist':5, 'start_dist':150, 'delta_phi':180 },
    'DDPG 5_160_180':{'finish_dist':5, 'start_dist':160, 'delta_phi':180 },
    'DDPG 5_170_180':{'finish_dist':5, 'start_dist':170, 'delta_phi':180 },
    'DDPG 5_180_180':{'finish_dist':5, 'start_dist':180, 'delta_phi':180 },
    'DDPG 5_190_180':{'finish_dist':5, 'start_dist':190, 'delta_phi':180 },
    'DDPG 5_200_180':{'finish_dist':5, 'start_dist':200, 'delta_phi':180 },
}

In [4]:
options=exp_params['options']
#options=None

env = HumanMoveSectorAction(continuous=True, target_point_rand=False, object_ignore=True, options=options)
env_render = HumanMoveSectorAction(continuous=True, target_point_rand=False, object_ignore=True, render_mode='rgb_array', options=options)


In [3]:
log_helper = {}
log_path = {}

if USE_MLFLOW:
    log_helper = mlf.MLflowServerHelper("http://192.168.0.206:2670", False)
else:
    log_helper = tnb.TensorboardHelper("",False)

In [None]:
#!!!!! Новый эксперимент
exp_params['env_name'] = env.name()
exp_name = 'env_' + exp_params['env_name'] + '_' + exp_params['exp_name']
experiment_id = log_helper.new_experiment(exp_name)


In [None]:
#!!!!! Дорасчет старого эксперимента
experiment_id = 297
exp_name = log_helper.get_experiment(experiment_id)

print(exp_name)

In [None]:
exp_params['exp_id'] = experiment_id

b_first = True
model = None

for name, option in option_var.items():


    print(name)

    exp_params['env_name'] = env.name()
    exp_params['algorithm_name'] = name
    exp_params['seed'] = int(datetime.now(TZ).strftime("%H%M%S"))
    exp_params['options'] = option


    env.set_options(option)
    env_render.set_options(option)

    if b_first == True:
        b_first = False
        model = DDPG.load('./teached/sector_move_DDPG.zip', env=env, device=device)
        #model = DDPG(  exp_params['training']['policy'], 
        #            env,
        #            #policy_kwargs=policy_kwargs,
        #            learning_rate=exp_params['training']['learning_rate'],
        #            buffer_size=exp_params['training']['buffer_size'],
        #            learning_starts=exp_params['training']['learning_starts'],
        #            batch_size=exp_params['training']['batch_size'],
        #            tau=exp_params['training']['tau'],
        #            gamma=exp_params['training']['gamma'],
        #            verbose=exp_params['training']['verbose'],
        #            device=device,
        #            )
    else:
        print(exp_name)
        model = DDPG.load(f'{exp_name}/model.zip', env=env, device=device)


    art_loc, exp_name, run_id = log_helper.learn_and_fix(
                                model=model, 
                                env= env_render,
                                run_name=name,
                                episode_count = exp_params['training']['episode_count'],
                                parameters=exp_params,
                                experiment_id=experiment_id,
                                checkpoint_interval=exp_params['validation']['validate_agent_every_n_eps'],
                                log_interval=exp_params['validation']['log_interval'])

    log_path[name] = f'{art_loc}/{run_id}/artifacts/{exp_name}/sb3/model.zip'




In [None]:
exp_params['exp_id'] = experiment_id
exp_params['algorithm_name'] = 'DDPG'

b_first = True
model = None

for i in range(exp_params['evaluation']['episode_count']):

    exp_params['env_name'] = env.name()
    exp_params['seed'] = int(datetime.now(TZ).strftime("%H%M%S"))


    if b_first == True:
        b_first = False
        model = DDPG.load('./teached/sector_move_DDPG.zip', env=env, device=device)
        #model = DDPG(  exp_params['training']['policy'], 
        #            env,
        #            #policy_kwargs=policy_kwargs,
        #            learning_rate=exp_params['training']['learning_rate'],
        #            buffer_size=exp_params['training']['buffer_size'],
        #            learning_starts=exp_params['training']['learning_starts'],
        #            batch_size=exp_params['training']['batch_size'],
        #            tau=exp_params['training']['tau'],
        #            gamma=exp_params['training']['gamma'],
        #            verbose=exp_params['training']['verbose'],
        #            device=device,
        #            )
    else:
        print(exp_name)
        model = DDPG.load(f'{exp_name}/model.zip', env=env, device=device)


    art_loc, exp_name, run_id = log_helper.learn_and_fix(
                                model=model, 
                                env= env_render,
                                run_name=f'DDPG_{i+1}',
                                episode_count = exp_params['training']['episode_count'],
                                parameters=exp_params,
                                experiment_id=experiment_id,
                                checkpoint_interval=exp_params['validation']['validate_agent_every_n_eps'],
                                log_interval=exp_params['validation']['log_interval'])

    log_path[i] = f'{art_loc}/{run_id}/artifacts/{exp_name}/sb3/model.zip'




In [None]:
log_path

In [6]:
log_path = {
 'DDPG': 'mlflow-artifacts:/368/635a3b59e18f46d4be8fc7d5e2e60e77/artifacts/env_MoveSector_IgnoreObst_exp_1310_121241/sb3/model.zip'
 }

In [None]:
experiment_id = 368
load_path = os.path.join(str(experiment_id))
if  os.path.isdir(load_path) == False:
    os.mkdir(load_path)
load_path

In [None]:
loadel_models = {}

for model_name, path in log_path.items():
    loadel_models[model_name] =  f'{load_path}/{model_name}/'
    log_helper.load_artifact(path,loadel_models[model_name])

In [9]:
env = HumanMoveSectorAction(target_point_rand=False, object_ignore= True)
env_render = HumanMoveSectorAction(target_point_rand=False, object_ignore= True, render_mode='rgb_array')

In [None]:
exp_params['env_name'] = env.name()
exp_name = 'env_' + exp_params['env_name'] + '_' + exp_params['exp_name']
experiment_id = log_helper.new_experiment(exp_name)
exp_params['exp_id'] = experiment_id
print(exp_name)

In [None]:
#!!! Дорасчет в СТАРОМ эксперименте
exp_name = log_helper.get_experiment(experiment_id)
print(exp_name)
exp_params['exp_id'] = experiment_id
exp_params['env_name'] = env.name()

In [None]:

exp_params['algorithm_name'] = 'DDPG'
exp_params['env_name'] = env.name()

path = loadel_models['DDPG']
print(path)
name = 'DDPG'

is_first = True

for name, option in option_var2.items():


    print(name)

    exp_params['seed'] = int(datetime.now(TZ).strftime("%H%M%S"))
    exp_params['options'] = option

    env.set_options(option)
    env_render.set_options(option)

    model = None
    if is_first == True:
        is_first = False
        model = DDPG.load(f'{path}/model.zip', env=env, device=device)
    else:
        model = DDPG.load(f'{exp_name}/model.zip', env=env, device=device)


    art_loc, exp_name, run_id = log_helper.learn_and_fix(
                                model=model, 
                                env= env_render,
                                run_name=name,
                                episode_count = exp_params['training']['episode_count'],
                                parameters=exp_params,
                                experiment_id=experiment_id,
                                checkpoint_interval=exp_params['validation']['validate_agent_every_n_eps'],
                                log_interval=exp_params['validation']['log_interval'])

    log_path[name] = f'{art_loc}/{run_id}/artifacts/{exp_name}/sb3/model.zip'



In [None]:
exp_params['algorithm_name'] = 'DDPG'

path = loadel_models['DDPG']
print(path)
name = 'DDPG'

is_first = True

for i in range(exp_params['evaluation']['episode_count']):

    exp_params['seed'] = int(datetime.now(TZ).strftime("%H%M%S"))

    model = None
    if is_first == True:
        is_first = False
        model = DDPG.load(f'{path}/model.zip', env=env, device=device)
    else:
        model = DDPG.load(f'{exp_name}/model.zip', env=env, device=device)


    art_loc, exp_name, run_id = log_helper.learn_and_fix(
                                model=model, 
                                env= env_render,
                                run_name=f'{name}_{i+1}',
                                episode_count = exp_params['training']['episode_count'],
                                parameters=exp_params,
                                experiment_id=experiment_id,
                                checkpoint_interval=exp_params['validation']['validate_agent_every_n_eps'],
                                log_interval=exp_params['validation']['log_interval'])

    log_path[name] = f'{art_loc}/{run_id}/artifacts/{exp_name}/sb3/model.zip'

