In [1]:
import torch
import numpy as np
from stable_baselines3 import DQN
from stable_baselines3.common.noise import NormalActionNoise

import os, shutil, sys
sys.path.append(os.path.abspath('./env'))
sys.path.append(os.path.abspath('./common'))

from env.env_simple_move  import HumanMoveSimpleAction
import common.mlflow_sb3_helper as mlf

from pytz import timezone
from datetime import datetime

TZ = timezone('Europe/Moscow')
device = "cuda" if torch.cuda.is_available() else "cpu"

In [2]:
exp_params = {
    'env_name': '',
    'algorithm_name': '',
    'exp_id': 0,
    'exp_name': f'exp_{datetime.now(TZ).strftime("%d%m_%H%M%S")}',
    'seed': 21,
    'net': {
        'activation': 'ReLU',
        'pi': [256, 256],
        'qf': [256, 256],
        'vf': [256, 256],
    },
    'training': {
        'iteration_count': 1,
        'episode_count': 39000,
        'policy': 'MlpPolicy',
        'learning_rate': 0.003,
        'buffer_size': 1500000,
        'learning_starts': 100,
        'batch_size': 256,
        'tau': 0.005,
        'gamma': 0.99,
        'verbose': 0,
        'device': device,
        'DQN':{
            'fraction_eps' : 0.6,
            'start_eps' : 1.0,
            'final_eps' : 0.3,
        },
    },
    'options':{
        'finish_dist':100,
        'start_dist':110,
        'delta_phi':0
    },
    'validation': {
        'validate_agent_every_n_eps': 20000,
        'log_interval': 10
    },
    'evaluation': {
        'episode_count': 1
    }
}

eps_var ={        
'DQN1':{'fraction_eps' : 0.8, 'start_eps' : 1.0,'final_eps' : 0.3,},
'DQN2':{'fraction_eps' : 0.4, 'start_eps' : 1.0,'final_eps' : 0.3,},
'DQN3':{'fraction_eps' : 0.1, 'start_eps' : 1.0,'final_eps' : 0.3,},
'DQN4':{'fraction_eps' : 0.1, 'start_eps' : 1.0,'final_eps' : 0.1,},
'DQN5':{'fraction_eps' : 0.1, 'start_eps' : 0.3,'final_eps' : 0.05,},
'DQN6':{'fraction_eps' : 0., 'start_eps' : 0.05,'final_eps' : 0.05,},
}


option_var ={
    #'DQN 100_0':{'finish_dist':100, 'start_dist':110, 'delta_phi':0 },
    'DQN 80_0':{'finish_dist':80, 'start_dist':110, 'delta_phi':0 },
    'DQN 80_20':{'finish_dist':80, 'start_dist':110, 'delta_phi':20 },
    'DQN 80_45':{'finish_dist':80, 'start_dist':110, 'delta_phi':45 },
    'DQN 80_90':{'finish_dist':80, 'start_dist':110, 'delta_phi':90 },
    'DQN 80_180':{'finish_dist':80, 'start_dist':110, 'delta_phi':180 },
    'DQN 60_180':{'finish_dist':60, 'start_dist':110, 'delta_phi':180 },
    'DQN 40_180':{'finish_dist':40, 'start_dist':110, 'delta_phi':180 },
    'DQN 20_180':{'finish_dist':20, 'start_dist':110, 'delta_phi':180 },
    'DQN 10_180':{'finish_dist':10, 'start_dist':110, 'delta_phi':180 },
}

In [3]:
options=exp_params['options']
#options=None

env_disc = HumanMoveSimpleAction(continuous=False, target_point_rand=False, options=options)
env_disc_render = HumanMoveSimpleAction(continuous=False, target_point_rand=False, render_mode='rgb_array', options=options)


In [4]:

mlflow_server = mlf.MLflowServerHelper("http://192.168.0.206:2670", False)
mlflow_path = {}

In [5]:
#!!!!! Новый эксперимент
exp_params['env_name'] = env_disc.name()
exp_name = 'env_' + exp_params['env_name'] + '_' + exp_params['exp_name']
experiment_id = mlflow_server.new_experiment(exp_name)


2024/09/21 17:46:04 INFO mlflow.tracking.fluent: Experiment with name 'env_HumanMoveSimple_exp_2109_174604' does not exist. Creating a new experiment.


In [None]:
#!!!!! Дорасчет старого эксперимента
experiment_id = 207
exp_name = mlflow_server.get_experiment(experiment_id)

print(exp_name)

In [6]:
exp_params['exp_id'] = experiment_id

b_first = True
model = None

for name, option in option_var.items():


    print(name)

    exp_params['env_name'] = env_disc.name()
    exp_params['algorithm_name'] = name
    exp_params['seed'] = int(datetime.now(TZ).strftime("%H%M%S"))
    exp_params['options'] = option

    if option['finish_dist'] > 60:
        exp_params['training']['DQN'] = eps_var['DQN2']
    else:
        exp_params['training']['DQN'] = eps_var['DQN5']


    env_disc.set_options(option)
    env_disc_render.set_options(option)

    if b_first == True:
        b_first = False
        model = DQN(  exp_params['training']['policy'], 
                        env_disc,
                        #policy_kwargs=policy_kwargs,
                        learning_rate=exp_params['training']['learning_rate'],
                        buffer_size=exp_params['training']['buffer_size'],
                        learning_starts=exp_params['training']['learning_starts'],
                        batch_size=exp_params['training']['batch_size'],
                        tau=exp_params['training']['tau'],
                        gamma=exp_params['training']['gamma'],
                        verbose=exp_params['training']['verbose'],
                        exploration_fraction=exp_params['training']['DQN']['fraction_eps'],
                        exploration_initial_eps= exp_params['training']['DQN']['start_eps'],
                        exploration_final_eps= exp_params['training']['DQN']['final_eps'],
                        device=device,
                        )
    else:
        print(exp_name)
        model = DQN.load(f'{exp_name}/model.zip', env=env_disc, device=device)
        model.exploration_fraction=exp_params['training']['DQN']['fraction_eps']
        model.exploration_initial_eps= exp_params['training']['DQN']['start_eps']
        model.exploration_final_eps= exp_params['training']['DQN']['final_eps']
        model._setup_model()

    art_loc, exp_name, run_id = mlflow_server.learn_and_fix(
                                model=model, 
                                env= env_disc_render,
                                run_name=name,
                                episode_count = exp_params['training']['episode_count'],
                                parameters=exp_params,
                                experiment_id=experiment_id,
                                checkpoint_interval=exp_params['validation']['validate_agent_every_n_eps'],
                                log_interval=exp_params['validation']['log_interval'])

    mlflow_path[name] = f'{art_loc}/{run_id}/artifacts/{exp_name}/sb3/model.zip'




DQN 80_0


Output()

Moviepy - Building video env_HumanMoveSimple_exp_2109_174604//agent.mp4.
Moviepy - Writing video env_HumanMoveSimple_exp_2109_174604//agent.mp4



                                                               

Moviepy - Done !
Moviepy - video ready env_HumanMoveSimple_exp_2109_174604//agent.mp4
DQN 80_20
env_HumanMoveSimple_exp_2109_174604


Output()

Moviepy - Building video env_HumanMoveSimple_exp_2109_174604//agent.mp4.
Moviepy - Writing video env_HumanMoveSimple_exp_2109_174604//agent.mp4



                                                               

Moviepy - Done !
Moviepy - video ready env_HumanMoveSimple_exp_2109_174604//agent.mp4
DQN 80_45
env_HumanMoveSimple_exp_2109_174604


Output()

Moviepy - Building video env_HumanMoveSimple_exp_2109_174604//agent.mp4.
Moviepy - Writing video env_HumanMoveSimple_exp_2109_174604//agent.mp4



                                                               

Moviepy - Done !
Moviepy - video ready env_HumanMoveSimple_exp_2109_174604//agent.mp4
DQN 80_90
env_HumanMoveSimple_exp_2109_174604


Output()

Moviepy - Building video env_HumanMoveSimple_exp_2109_174604//agent.mp4.
Moviepy - Writing video env_HumanMoveSimple_exp_2109_174604//agent.mp4



                                                                 

Moviepy - Done !
Moviepy - video ready env_HumanMoveSimple_exp_2109_174604//agent.mp4
DQN 80_180
env_HumanMoveSimple_exp_2109_174604


Output()

Moviepy - Building video env_HumanMoveSimple_exp_2109_174604//agent.mp4.
Moviepy - Writing video env_HumanMoveSimple_exp_2109_174604//agent.mp4



                                                               

Moviepy - Done !
Moviepy - video ready env_HumanMoveSimple_exp_2109_174604//agent.mp4
DQN 60_180
env_HumanMoveSimple_exp_2109_174604


Output()

Moviepy - Building video env_HumanMoveSimple_exp_2109_174604//agent.mp4.
Moviepy - Writing video env_HumanMoveSimple_exp_2109_174604//agent.mp4



                                                               

Moviepy - Done !
Moviepy - video ready env_HumanMoveSimple_exp_2109_174604//agent.mp4
DQN 40_180
env_HumanMoveSimple_exp_2109_174604


Output()

Moviepy - Building video env_HumanMoveSimple_exp_2109_174604//agent.mp4.
Moviepy - Writing video env_HumanMoveSimple_exp_2109_174604//agent.mp4



                                                                 

Moviepy - Done !
Moviepy - video ready env_HumanMoveSimple_exp_2109_174604//agent.mp4
DQN 20_180
env_HumanMoveSimple_exp_2109_174604


Output()

Moviepy - Building video env_HumanMoveSimple_exp_2109_174604//agent.mp4.
Moviepy - Writing video env_HumanMoveSimple_exp_2109_174604//agent.mp4



                                                                 

Moviepy - Done !
Moviepy - video ready env_HumanMoveSimple_exp_2109_174604//agent.mp4
DQN 10_180
env_HumanMoveSimple_exp_2109_174604


Output()

Moviepy - Building video env_HumanMoveSimple_exp_2109_174604//agent.mp4.
Moviepy - Writing video env_HumanMoveSimple_exp_2109_174604//agent.mp4



                                                                 

Moviepy - Done !
Moviepy - video ready env_HumanMoveSimple_exp_2109_174604//agent.mp4


In [None]:
mlflow_path

In [11]:
mlflow_path = {
 'DQN': 'mlflow-artifacts:/216/684a0b5a74e7466498e7f42b7b84b0c6/artifacts/env_HumanMoveSimple_exp_2009_114432/sb3/model.zip'
 }

In [None]:
experiment_id = 216
load_path = os.path.join(str(experiment_id))
if  os.path.isdir(load_path) == False:
    os.mkdir(load_path)
load_path

In [None]:
loadel_models = {}

for model_name, path in mlflow_path.items():
    loadel_models[model_name] =  f'{load_path}/{model_name}/'
    mlflow_server.load_artifact(path,loadel_models[model_name])

In [13]:
exp_params['training']['episode_count'] = 99000

exp_params['options']['finish_dist'] = 10
exp_params['options']['start_dist'] = 110
exp_params['options']['delta_phi'] = 0
options=exp_params['options']

env = HumanMoveSimpleAction(target_point_rand=False,options=options)
env_disc = HumanMoveSimpleAction(continuous=False, target_point_rand=False,options=options)

env_render = HumanMoveSimpleAction(target_point_rand=False, render_mode='rgb_array',options=options)
env_disc_render = HumanMoveSimpleAction(continuous=False, target_point_rand=False, render_mode='rgb_array',options=options)

In [None]:
exp_params['env_name'] = env.name()
exp_name = 'env_' + exp_params['env_name'] + '_' + exp_params['exp_name']
experiment_id = mlflow_server.new_experiment(exp_name)
exp_params['exp_id'] = experiment_id
print(exp_name)

In [None]:
#!!! Дорасчет в СТАРОМ эксперименте
exp_name = mlflow_server.get_experiment(experiment_id)
print(exp_name)
exp_params['exp_id'] = experiment_id
exp_params['env_name'] = env.name()

In [15]:

models_exclude = {'PPO':True, 'DQN': False, 'DDPG': True, 'SAC': True, 'TD3': True}
#models_exclude = None

In [None]:
run_post='_10'
exp_params['training']['DQN'] = eps_var['DQN3']

for name, model_path in loadel_models.items():

    if models_exclude != None and models_exclude[name] == True:
        continue
    print(name)

    exp_params['algorithm_name'] = name
    exp_params['seed'] = int(datetime.now(TZ).strftime("%H%M%S"))

    set_env = env_disc if name == 'DQN' or name == 'PPO' else env
    model = models[name].load(model_path + 'model.zip', env=set_env, device=device)
    
    #model.seed(seed=exp_params['seed'])

    if name == 'DQN':
        model.exploration_fraction=exp_params['training']['DQN']['fraction_eps']
        model.exploration_initial_eps= exp_params['training']['DQN']['start_eps']
        model.exploration_final_eps= exp_params['training']['DQN']['final_eps']
        model._setup_model()

    art_loc, exp_name, run_id = mlflow_server.learn_and_fix(
                                model=model, 
                                env= env_disc_render if name == 'DQN' or name == 'PPO' else env_render,
                                run_name=name + run_post,
                                episode_count = exp_params['training']['episode_count'],
                                parameters=exp_params,
                                experiment_id=experiment_id,
                                checkpoint_interval=exp_params['validation']['validate_agent_every_n_eps'],
                                log_interval=exp_params['validation']['log_interval'])

In [None]:
shutil.rmtree(os.path.join(load_path))