In [1]:
import torch
import numpy as np
from stable_baselines3 import SAC


import os, shutil, sys
sys.path.append(os.path.abspath('./env'))
sys.path.append(os.path.abspath('./common'))

from env.env_move_sector  import HumanMoveSectorAction
import common.mlflow_sb3_helper as mlf

from pytz import timezone
from datetime import datetime

TZ = timezone('Europe/Moscow')
device = "cuda" if torch.cuda.is_available() else "cpu"

In [2]:
exp_params = {
    'env_name': '',
    'algorithm_name': '',
    'exp_id': 0,
    'exp_name': f'exp_{datetime.now(TZ).strftime("%d%m_%H%M%S")}',
    'seed': 21,
    'net': {
        'activation': 'ReLU',
        'pi': [256, 256],
        'qf': [256, 256],
        'vf': [256, 256],
    },
    'training': {
        'iteration_count': 1,
        'episode_count': 39000,
        'policy': 'MlpPolicy',
        'learning_rate': 0.001,
        'buffer_size': 1500000,
        'learning_starts': 100,
        'batch_size': 256,
        'tau': 0.001,
        'gamma': 0.95,
        'verbose': 0,
        'device': device,
    },
    'options':{
        'finish_dist':100,
        'start_dist':110,
        'delta_phi':0
    },
    'validation': {
        'validate_agent_every_n_eps': 20000,
        'log_interval': 10
    },
    'evaluation': {
        'episode_count': 4
    }
}


In [3]:

mlflow_server = mlf.MLflowServerHelper("http://192.168.0.206:2670", False)
mlflow_path = {}

In [4]:
experiment_id = 378
mlflow_path = {
 'SAC': f'mlflow-artifacts:/{experiment_id}/58155bd8c7fc42d3a13a0c7c9e897056/artifacts/env_MoveSector_RanTP_IgnoreObst_exp_1410_193855/sb3/model.zip'
 }

In [5]:

load_path = os.path.join(str(experiment_id))
if  os.path.isdir(load_path) == False:
    os.mkdir(load_path)
load_path

'378'

In [6]:
loadel_models = {}

for model_name, path in mlflow_path.items():
    loadel_models[model_name] =  f'{load_path}/{model_name}/'
    mlflow_server.load_artifact(path,loadel_models[model_name])

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

In [7]:
env = HumanMoveSectorAction(target_point_rand=True, object_ignore=False)
env_render = HumanMoveSectorAction(target_point_rand=True, object_ignore=False, render_mode='rgb_array')

In [8]:
exp_params['env_name'] = env.name()
exp_name = 'env_' + exp_params['env_name'] + '_' + exp_params['exp_name']
experiment_id = mlflow_server.new_experiment(exp_name)
exp_params['exp_id'] = experiment_id
print(exp_name)

2024/10/15 11:26:45 INFO mlflow.tracking.fluent: Experiment with name 'env_MoveSector_RanTP_exp_1510_112644' does not exist. Creating a new experiment.


env_MoveSector_RanTP_exp_1510_112644


In [None]:
#!!! Дорасчет в СТАРОМ эксперименте
exp_name = mlflow_server.get_experiment(experiment_id)
print(exp_name)
exp_params['exp_id'] = experiment_id
exp_params['env_name'] = env.name()

In [9]:

exp_params['algorithm_name'] = 'SAC'

path = loadel_models['SAC']
print(path)
name = 'SAC'

is_first = True

for i in range(exp_params['evaluation']['episode_count']):


    exp_params['seed'] = int(datetime.now(TZ).strftime("%H%M%S"))

    if is_first:
        model = SAC.load(f'{path}/model.zip', env=env, device=device)
        is_first = False
    else:
        model = SAC.load(f'{exp_name}/model.zip', env=env, device=device)


    art_loc, exp_name, run_id = mlflow_server.learn_and_fix(
                                model=model, 
                                env= env_render,
                                run_name=f'{name}_{i+1}',
                                episode_count = exp_params['training']['episode_count'],
                                parameters=exp_params,
                                experiment_id=experiment_id,
                                checkpoint_interval=exp_params['validation']['validate_agent_every_n_eps'],
                                log_interval=exp_params['validation']['log_interval'])

    mlflow_path[name] = f'{art_loc}/{run_id}/artifacts/{exp_name}/sb3/model.zip'



378/SAC/


Output()

Moviepy - Building video env_MoveSector_RanTP_exp_1510_112644//agent.mp4.
Moviepy - Writing video env_MoveSector_RanTP_exp_1510_112644//agent.mp4



                                                                 

Moviepy - Done !
Moviepy - video ready env_MoveSector_RanTP_exp_1510_112644//agent.mp4


Output()

Moviepy - Building video env_MoveSector_RanTP_exp_1510_112644//agent.mp4.
Moviepy - Writing video env_MoveSector_RanTP_exp_1510_112644//agent.mp4



                                                                 

Moviepy - Done !
Moviepy - video ready env_MoveSector_RanTP_exp_1510_112644//agent.mp4


Output()

In [None]:
mlflow_path

In [None]:
shutil.rmtree(os.path.join(load_path))