In [1]:
import torch
import numpy as np
from stable_baselines3 import DQN

import os, shutil, sys
sys.path.append(os.path.abspath('./env'))
sys.path.append(os.path.abspath('./common'))

from env.env_move_sector_DQN import HumanMoveSectorAction
import common.mlflow_sb3_helper as mlf

In [2]:
mlflow_server = mlf.MLflowServerHelper("http://192.168.0.206:2670", False)

from pytz import timezone
from datetime import datetime

TZ = timezone('Europe/Moscow')

activations = {'ReLU':torch.nn.ReLU, 'Tanh':torch.nn.Tanh}
device = "cuda" if torch.cuda.is_available() else "cpu"

mlflow_path = {}

In [3]:
env = HumanMoveSectorAction(continuous=False, target_point_rand=False, object_ignore=True)

env_render = HumanMoveSectorAction(continuous=False, target_point_rand=False, object_ignore=True, render_mode='rgb_array')

In [3]:
exp_params = {
    'env_name': '',
    'algorithm_name': '',
    'exp_id': 0,
    'exp_name': f'exp_{datetime.now(TZ).strftime("%d%m_%H%M%S")}',
    'seed': 21,
    'net': {
        'activation': 'ReLU',
        'pi': [256, 256],
        'qf': [256, 256],
        'vf': [256, 256],
    },
    'training': {
        'iteration_count': 1,
        'episode_count': 59000,
        'policy': 'MlpPolicy',
        'learning_rate': 0.003,
        'buffer_size': 1500000,
        'learning_starts': 100,
        'batch_size': 256,
        'tau': 0.005,
        'gamma': 0.99,
        'verbose': 0,
        'device': device,
        'DQN':{
            'fraction_eps' : 0.6,
            'start_eps' : 1.0,
            'final_eps' : 0.3,
        },
    },
    'validation': {
        'validate_agent_every_n_eps': 20000,
        'log_interval': 10
    },
    'evaluation': {
        'episode_count': 1
    }
}

policy_kwargs = dict(activation_fn = activations[exp_params['net']['activation']],
                     net_arch = dict(pi=exp_params['net']['pi'], qf=exp_params['net']['qf'], vf=exp_params['net']['vf']))




In [12]:
eps_var ={        
'DQN1':{'fraction_eps' : 0.8, 'start_eps' : 1.0,'final_eps' : 0.3,},
'DQN2':{'fraction_eps' : 0.4, 'start_eps' : 1.0,'final_eps' : 0.3,},
'DQN3':{'fraction_eps' : 0.1, 'start_eps' : 1.0,'final_eps' : 0.3,},
'DQN4':{'fraction_eps' : 0.1, 'start_eps' : 1.0,'final_eps' : 0.1,},
'DQN5':{'fraction_eps' : 0.1, 'start_eps' : 0.3,'final_eps' : 0.05,},
'DQN6':{'fraction_eps' : 0., 'start_eps' : 0.05,'final_eps' : 0.05,},
}

In [6]:
#!!!!! Новый эксперимент
exp_params['env_name'] = env.name()
exp_name = 'env_' + exp_params['env_name'] + '_' + exp_params['exp_name']
experiment_id = mlflow_server.new_experiment(exp_name)

2024/09/16 12:38:17 INFO mlflow.tracking.fluent: Experiment with name 'env_MoveSector_IgnoreObst_exp_1609_123811' does not exist. Creating a new experiment.


In [None]:
#!!!!! Дорасчет старого эксперимента
exp_params['env_name'] = env.name()
experiment_id = 161
exp_name = mlflow_server.get_experiment(experiment_id)
print(exp_name)

In [7]:
exp_params['exp_id'] = experiment_id

b_first = True
name = "DQN"

for eps_name, eps in eps_var.items():
    print (eps_name)

    exp_params['algorithm_name'] = name
    exp_params['seed'] = int(datetime.now(TZ).strftime("%H%M%S"))
    exp_params['training']['DQN'] = eps
    model = None
    if b_first == True:
        b_first = False
        model = DQN(  exp_params['training']['policy'], 
                    env,
                    #policy_kwargs=policy_kwargs,
                    learning_rate=exp_params['training']['learning_rate'],
                    buffer_size=exp_params['training']['buffer_size'],
                    learning_starts=exp_params['training']['learning_starts'],
                    batch_size=exp_params['training']['batch_size'],
                    tau=exp_params['training']['tau'],
                    gamma=exp_params['training']['gamma'],
                    verbose=exp_params['training']['verbose'],
                    exploration_fraction=exp_params['training']['DQN']['fraction_eps'],
                    exploration_initial_eps= exp_params['training']['DQN']['start_eps'],
                    exploration_final_eps= exp_params['training']['DQN']['final_eps'],
                    device=device,
                    )
    else:
        model = DQN.load(f'{exp_name}/model.zip', env=env, device=device)
        model.exploration_fraction=exp_params['training']['DQN']['fraction_eps']
        model.exploration_initial_eps= exp_params['training']['DQN']['start_eps']
        model.exploration_final_eps= exp_params['training']['DQN']['final_eps']
        model._setup_model()

    art_loc, exp_name, run_id = mlflow_server.learn_and_fix(
                                model=model, 
                                env= env_render,
                                run_name=eps_name,
                                episode_count = exp_params['training']['episode_count'],
                                parameters=exp_params,
                                experiment_id=experiment_id,
                                checkpoint_interval=exp_params['validation']['validate_agent_every_n_eps'],
                                log_interval=exp_params['validation']['log_interval'])
    mlflow_path[eps_name] = f'{art_loc}/{run_id}/artifacts/{exp_name}/sb3/model.zip'


DQN1


Output()

Moviepy - Building video env_MoveSector_IgnoreObst_exp_1609_123811//agent.mp4.
Moviepy - Writing video env_MoveSector_IgnoreObst_exp_1609_123811//agent.mp4



                                                              

Moviepy - Done !
Moviepy - video ready env_MoveSector_IgnoreObst_exp_1609_123811//agent.mp4
DQN2


Output()

Moviepy - Building video env_MoveSector_IgnoreObst_exp_1609_123811//agent.mp4.
Moviepy - Writing video env_MoveSector_IgnoreObst_exp_1609_123811//agent.mp4



                                                             

Moviepy - Done !
Moviepy - video ready env_MoveSector_IgnoreObst_exp_1609_123811//agent.mp4




DQN3


Output()

Moviepy - Building video env_MoveSector_IgnoreObst_exp_1609_123811//agent.mp4.
Moviepy - Writing video env_MoveSector_IgnoreObst_exp_1609_123811//agent.mp4



                                                               

Moviepy - Done !
Moviepy - video ready env_MoveSector_IgnoreObst_exp_1609_123811//agent.mp4
DQN4


Output()

Moviepy - Building video env_MoveSector_IgnoreObst_exp_1609_123811//agent.mp4.
Moviepy - Writing video env_MoveSector_IgnoreObst_exp_1609_123811//agent.mp4



                                                   

Moviepy - Done !
Moviepy - video ready env_MoveSector_IgnoreObst_exp_1609_123811//agent.mp4




In [8]:
mlflow_path

{'DQN1': 'mlflow-artifacts:/196/8fa1cb8e397d455fa40384db60b91f6f/artifacts/env_MoveSector_IgnoreObst_exp_1609_123811/sb3/model.zip',
 'DQN2': 'mlflow-artifacts:/196/06fc8779fa144ba68aa077cef6b72591/artifacts/env_MoveSector_IgnoreObst_exp_1609_123811/sb3/model.zip',
 'DQN3': 'mlflow-artifacts:/196/4414df89b685485485d981e9507c0e23/artifacts/env_MoveSector_IgnoreObst_exp_1609_123811/sb3/model.zip',
 'DQN4': 'mlflow-artifacts:/196/57023a6c73994afeaef121ec06825195/artifacts/env_MoveSector_IgnoreObst_exp_1609_123811/sb3/model.zip'}

In [4]:
experiment_id = 196

In [22]:
mlflow_path = {  'DQN': 'mlflow-artifacts:/196/22d34f607bab43edbdef011992b9ff95/artifacts/env_MoveSector_IgnoreObst_exp_1609_123811/sb3/model.zip' }

In [6]:

load_path = os.path.join(str(experiment_id))
os.mkdir(load_path)
load_path

'196'

In [23]:

loadel_models = {}

for model_name, path in mlflow_path.items():
    loadel_models[model_name] =  f'{load_path}/{model_name}/'
    mlflow_server.load_artifact(path,loadel_models[model_name])

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

In [24]:
env = HumanMoveSectorAction(continuous=False, target_point_rand=False, object_ignore=True)
env_render = HumanMoveSectorAction(continuous=False, target_point_rand=False, object_ignore=True, render_mode='rgb_array')
 
env.name()

'MoveSector_IgnoreObst'

In [25]:
#!!! Дорасчет в НОВОМ эксперименте
exp_params['env_name'] = env.name()
exp_name = 'env_' + exp_params['env_name'] + '_' + exp_params['exp_name']
experiment_id = mlflow_server.new_experiment(exp_name)
exp_params['exp_id'] = experiment_id
print(exp_name)


2024/09/17 09:42:37 INFO mlflow.tracking.fluent: Experiment with name 'env_MoveSector_IgnoreObst_exp_1709_073228' does not exist. Creating a new experiment.


env_MoveSector_IgnoreObst_exp_1709_073228


In [18]:
#!!! Дорасчет в СТАРОМ эксперименте
exp_name = mlflow_server.get_experiment(experiment_id)
print(exp_name)

env_MoveSector_IgnoreObst_exp_1609_123811


In [19]:
for name, model_path in loadel_models.items():

    print(name)

    exp_params['algorithm_name'] = name
    exp_params['seed'] = int(datetime.now(TZ).strftime("%H%M%S"))
    exp_params['training']['DQN'] = eps_var['DQN5']

    model = DQN.load(model_path + 'model.zip', env=env, device=device)
    model.exploration_fraction=exp_params['training']['DQN']['fraction_eps']
    model.exploration_initial_eps= exp_params['training']['DQN']['start_eps']
    model.exploration_final_eps= exp_params['training']['DQN']['final_eps']
    model._setup_model()

    art_loc, exp_name, run_id = mlflow_server.learn_and_fix(
                                model=model, 
                                env= env_render,
                                run_name='DQN6',
                                episode_count = exp_params['training']['episode_count'],
                                parameters=exp_params,
                                experiment_id=experiment_id,
                                checkpoint_interval=exp_params['validation']['validate_agent_every_n_eps'],
                                log_interval=exp_params['validation']['log_interval'])
    
    

DQN5


Output()

Moviepy - Building video env_MoveSector_IgnoreObst_exp_1609_123811//agent.mp4.
Moviepy - Writing video env_MoveSector_IgnoreObst_exp_1609_123811//agent.mp4



                                                               

Moviepy - Done !
Moviepy - video ready env_MoveSector_IgnoreObst_exp_1609_123811//agent.mp4


In [26]:
name = 'DQN'
model_path = loadel_models[name]
exp_params['algorithm_name'] = name
exp_params['seed'] = int(datetime.now(TZ).strftime("%H%M%S"))
exp_params['training']['DQN'] = eps_var['DQN5']

model = DQN.load(model_path + 'model.zip', env=env, device=device)
model.exploration_fraction=exp_params['training']['DQN']['fraction_eps']
model.exploration_initial_eps= exp_params['training']['DQN']['start_eps']
model.exploration_final_eps= exp_params['training']['DQN']['final_eps']
model._setup_model()

for i in range(10):

    print(i)


    art_loc, exp_name, run_id = mlflow_server.learn_and_fix(
                                model=model, 
                                env= env_render,
                                run_name=f'{name}_{i}',
                                episode_count = exp_params['training']['episode_count'],
                                parameters=exp_params,
                                experiment_id=experiment_id,
                                checkpoint_interval=exp_params['validation']['validate_agent_every_n_eps'],
                                log_interval=exp_params['validation']['log_interval'])
    
    model = DQN.load(f'{exp_name}/model.zip', env=env, device=device)

0


Output()

Moviepy - Building video env_MoveSector_IgnoreObst_exp_1709_073228//agent.mp4.
Moviepy - Writing video env_MoveSector_IgnoreObst_exp_1709_073228//agent.mp4



                                                   

Moviepy - Done !
Moviepy - video ready env_MoveSector_IgnoreObst_exp_1709_073228//agent.mp4




1


Output()

Moviepy - Building video env_MoveSector_IgnoreObst_exp_1709_073228//agent.mp4.
Moviepy - Writing video env_MoveSector_IgnoreObst_exp_1709_073228//agent.mp4



                                                              

Moviepy - Done !
Moviepy - video ready env_MoveSector_IgnoreObst_exp_1709_073228//agent.mp4
2


Output()

Moviepy - Building video env_MoveSector_IgnoreObst_exp_1709_073228//agent.mp4.
Moviepy - Writing video env_MoveSector_IgnoreObst_exp_1709_073228//agent.mp4



                                                               

Moviepy - Done !
Moviepy - video ready env_MoveSector_IgnoreObst_exp_1709_073228//agent.mp4
3


Output()

Moviepy - Building video env_MoveSector_IgnoreObst_exp_1709_073228//agent.mp4.
Moviepy - Writing video env_MoveSector_IgnoreObst_exp_1709_073228//agent.mp4



                                                              

Moviepy - Done !
Moviepy - video ready env_MoveSector_IgnoreObst_exp_1709_073228//agent.mp4
4


Output()

Moviepy - Building video env_MoveSector_IgnoreObst_exp_1709_073228//agent.mp4.
Moviepy - Writing video env_MoveSector_IgnoreObst_exp_1709_073228//agent.mp4



                                                              

Moviepy - Done !
Moviepy - video ready env_MoveSector_IgnoreObst_exp_1709_073228//agent.mp4
5


Output()

Moviepy - Building video env_MoveSector_IgnoreObst_exp_1709_073228//agent.mp4.
Moviepy - Writing video env_MoveSector_IgnoreObst_exp_1709_073228//agent.mp4



                                                               

Moviepy - Done !
Moviepy - video ready env_MoveSector_IgnoreObst_exp_1709_073228//agent.mp4
6


Output()

Moviepy - Building video env_MoveSector_IgnoreObst_exp_1709_073228//agent.mp4.
Moviepy - Writing video env_MoveSector_IgnoreObst_exp_1709_073228//agent.mp4



                                                              

Moviepy - Done !
Moviepy - video ready env_MoveSector_IgnoreObst_exp_1709_073228//agent.mp4
7


Output()

Moviepy - Building video env_MoveSector_IgnoreObst_exp_1709_073228//agent.mp4.
Moviepy - Writing video env_MoveSector_IgnoreObst_exp_1709_073228//agent.mp4



                                                               

Moviepy - Done !
Moviepy - video ready env_MoveSector_IgnoreObst_exp_1709_073228//agent.mp4
8


Output()

Moviepy - Building video env_MoveSector_IgnoreObst_exp_1709_073228//agent.mp4.
Moviepy - Writing video env_MoveSector_IgnoreObst_exp_1709_073228//agent.mp4



                                                               

Moviepy - Done !
Moviepy - video ready env_MoveSector_IgnoreObst_exp_1709_073228//agent.mp4
9


Output()

Moviepy - Building video env_MoveSector_IgnoreObst_exp_1709_073228//agent.mp4.
Moviepy - Writing video env_MoveSector_IgnoreObst_exp_1709_073228//agent.mp4



                                                              

Moviepy - Done !
Moviepy - video ready env_MoveSector_IgnoreObst_exp_1709_073228//agent.mp4


In [11]:
shutil.rmtree(os.path.join(load_path))